diff --git a/.github/workflows/cron-mmar.yml b/.github/workflows/cron-mmar.yml
new file mode 100644
index 0000000000..735c23117c
--- /dev/null
+++ b/.github/workflows/cron-mmar.yml
@@ -0,0 +1,42 @@
+name: cron-mmar
+
+on:
+  schedule:
+    - cron: "0 2 * * *"  # at 02:00 UTC
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+concurrency:
+  # automatically cancel the previously triggered workflows when there's a newer version
+  group: mmar-tests-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  cron-load:
+    if: github.repository == 'Project-MONAI/MONAI'
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: cache weekly timestamp
+      id: pip-cache
+      run: echo "::set-output name=datew::$(date '+%Y-%V')"
+    - name: cache for pip
+      uses: actions/cache@v2
+      id: cache
+      with:
+        path: ~/.cache/pip
+        key: ${{ runner.os }}-pip-${{ steps.pip-cache.outputs.datew }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip wheel
+        python -m pip install -r requirements-dev.txt
+    - name: Loading MMARs
+      run: |
+        # clean up temporary files
+        $(pwd)/runtests.sh --clean
+        # run tests
+        python -m tests.ngc_mmar_loading
diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index a36cfbcdb9..77d43d35bd 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -215,7 +215,7 @@ jobs:
         which python
         python -m pip install --upgrade pip wheel
         python -m pip install -r requirements-dev.txt
-        BUILD_MONAI=0 python setup.py develop  # install monai
+        BUILD_MONAI=1 python setup.py develop  # install monai
         nvidia-smi
         export CUDA_VISIBLE_DEVICES=$(python -m tests.utils)
         echo $CUDA_VISIBLE_DEVICES
@@ -234,5 +234,7 @@ jobs:
         trap 'if pgrep python; then pkill python; fi;' ERR
         python -c $'import torch\na,b=torch.zeros(1,device="cuda:0"),torch.zeros(1,device="cuda:1");\nwhile True:print(a,b)' > /dev/null &
         cd /opt/tutorials
+        python -c 'import monai; monai.config.print_debug_info()'
         $(pwd)/runner.sh
+        python -c 'import monai; monai.config.print_debug_info()'
         if pgrep python; then pkill python; fi
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index ed025e98fe..3a70983137 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -34,7 +34,7 @@ jobs:
         which python
         python -m pip install --upgrade pip wheel
         python -m pip uninstall -y torch torchvision
-        python -m pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
+        python -m pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
         python -m pip install -r requirements-dev.txt
     - name: Run integration tests
       run: |
diff --git a/.github/workflows/pythonapp-gpu.yml b/.github/workflows/pythonapp-gpu.yml
index 999567ae16..edaa2487ce 100644
--- a/.github/workflows/pythonapp-gpu.yml
+++ b/.github/workflows/pythonapp-gpu.yml
@@ -47,7 +47,7 @@ jobs:
             pytorch: "-h"
             base: "nvcr.io/nvidia/pytorch:21.08-py3"
           - environment: PT19+CUDA102
-            pytorch: "torch==1.9.0 torchvision==0.10.0"
+            pytorch: "torch==1.9.1 torchvision==0.10.1"
             base: "nvcr.io/nvidia/cuda:10.2-devel-ubuntu18.04"
     container:
       image: ${{ matrix.base }}
@@ -100,6 +100,8 @@ jobs:
       run: |
         which python
         python -m pip install --upgrade pip wheel
+        # fixes preinstalled ruamel_yaml error from the docker image
+        rm -rf $(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")/ruamel*
         python -m pip install ${{ matrix.pytorch }}
         python -m pip install -r requirements-dev.txt
         python -m pip list
diff --git a/.github/workflows/pythonapp.yml b/.github/workflows/pythonapp.yml
index 3f18263e9e..28a9f34839 100644
--- a/.github/workflows/pythonapp.yml
+++ b/.github/workflows/pythonapp.yml
@@ -87,10 +87,10 @@ jobs:
     - if: runner.os == 'windows'
       name: Install torch cpu from pytorch.org (Windows only)
       run: |
-        python -m pip install torch==1.9.0+cpu torchvision==0.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+        python -m pip install torch==1.9.1+cpu torchvision==0.10.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
     - name: Install the dependencies
       run: |
-        python -m pip install torch==1.9.0 torchvision==0.10.0
+        python -m pip install torch==1.9.1 torchvision==0.10.1
         cat "requirements-dev.txt"
         python -m pip install -r requirements-dev.txt
         python -m pip list
@@ -138,11 +138,11 @@ jobs:
     - if: runner.os == 'windows'
       name: Install torch cpu from pytorch.org (Windows only)
       run: |
-        python -m pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
+        python -m pip install torch==1.9.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
     - name: Install the dependencies
       run: |
         # min. requirements
-        python -m pip install torch==1.9.0
+        python -m pip install torch==1.9.1
         python -m pip install -r requirements-min.txt
         python -m pip list
         BUILD_MONAI=0 python setup.py develop  # no compile of extensions
@@ -152,6 +152,7 @@ jobs:
         python -c 'import torch; print(torch.__version__); print(torch.rand(5,3))'
         python -c "import monai; monai.config.print_config()"
         ./runtests.sh --min
+      shell: bash
       env:
         QUICKTEST: True
 
@@ -187,7 +188,7 @@ jobs:
     - name: Install the dependencies
       run: |
         # min. requirements
-        python -m pip install torch==1.9.0
+        python -m pip install torch==1.9.1
         python -m pip install -r requirements-min.txt
         python -m pip list
         BUILD_MONAI=0 python setup.py develop  # no compile of extensions
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index bfdc639788..0214ae4cf6 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -144,6 +144,17 @@ jobs:
         run: |
           # get tag info for versioning
           mv _version.py monai/
+          # version checks
+          target="\"version\": \"$RELEASE_VERSION\""
+          echo $target
+          local=`grep "\"version\"" monai/_version.py
+          echo $local
+          if [ "$local" = "$target" ]; then
+            echo "matched version string"
+          else
+            echo "unmatched version string, please check the main branch"
+            exit 1
+          fi
           # remove flake package as it is not needed on hub.docker.com
           sed -i '/flake/d' requirements-dev.txt
           docker build -t projectmonai/monai:"$RELEASE_VERSION" -f Dockerfile .
diff --git a/.github/workflows/weekly-preview.yml b/.github/workflows/weekly-preview.yml
index df0b5dd759..d5c4e5ae05 100644
--- a/.github/workflows/weekly-preview.yml
+++ b/.github/workflows/weekly-preview.yml
@@ -33,7 +33,7 @@ jobs:
         export YEAR_WEEK=$(date +'%y%U')
         echo "Year week for tag is ${YEAR_WEEK}"
         if ! [[ $YEAR_WEEK =~ ^[0-9]{4}$ ]] ; then echo "Wrong 'year week' format.  Should be 4 digits."; exit 1 ; fi
-        git tag "0.7.dev${YEAR_WEEK}"
+        git tag "0.8.dev${YEAR_WEEK}"
         git log -1
         git tag --list
         python setup.py sdist bdist_wheel
diff --git a/.gitignore b/.gitignore
index 7444d7f2f9..13155c3088 100644
--- a/.gitignore
+++ b/.gitignore
@@ -135,3 +135,4 @@ tests/testing_data/*.tiff
 
 # VSCode
 .vscode/
+*.zip
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c36c96186c..970158194b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,18 +22,30 @@ repos:
         args: ['--maxkb=1024']
       - id: detect-private-key
 
-  #- repo: https://github.com/asottile/pyupgrade
-  #  rev: v2.23.2
-  #  hooks:
-  #    - id: pyupgrade
-  #      args: [--py36-plus]
-  #      name: Upgrade code
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v2.29.0
+    hooks:
+      - id: pyupgrade
+        args: [--py36-plus]
+        name: Upgrade code
+        exclude: |
+          (?x)^(
+              versioneer.py|
+              monai/_version.py
+          )$
 
-  #- repo: https://github.com/asottile/yesqa
-  #  rev: v1.2.3
-  #  hooks:
-  #    - id: yesqa
-  #      name: Unused noqa
+  - repo: https://github.com/asottile/yesqa
+    rev: v1.2.3
+    hooks:
+      - id: yesqa
+        name: Unused noqa
+        additional_dependencies:
+          - flake8>=3.8.1
+          - flake8-bugbear
+          - flake8-comprehensions
+          - flake8-executable
+          - flake8-pyi
+          - pep8-naming
 
   #- repo: https://github.com/PyCQA/isort
   #  rev: 5.9.3
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bdbd23e7dd..7dea15cd0a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,50 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
-* renamed model's `n_classes` to `num_classes`
+
+## [0.7.0] - 2021-09-24
+### Added
+* Overview of [new features in v0.7](docs/source/whatsnew_0_7.md)
+* Initial phase of major usability improvements in `monai.transforms` to support input and backend in PyTorch and NumPy
+* Performance enhancements, with [profiling and tuning guides](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_model_training_guide.md) for typical use cases
+* Reproducing [training modules and workflows](https://github.com/Project-MONAI/tutorials/tree/master/kaggle/RANZCR/4th_place_solution) of state-of-the-art Kaggle competition solutions
+* 24 new transforms, including
+  * `OneOf` meta transform
+  * DeepEdit guidance signal transforms for interactive segmentation
+  * Transforms for self-supervised pre-training
+  * Integration of [NVIDIA Tools Extension](https://developer.nvidia.com/blog/nvidia-tools-extension-api-nvtx-annotation-tool-for-profiling-code-in-python-and-c-c/) (NVTX)
+  * Integration of [cuCIM](https://github.com/rapidsai/cucim)
+  * Stain normalization and contextual grid for digital pathology
+* `Transchex` network for vision-language transformers for chest X-ray analysis
+* `DatasetSummary` utility in `monai.data`
+* `WarmupCosineSchedule`
+* Deprecation warnings and documentation support for better backwards compatibility
+* Padding with additional `kwargs` and different backend API
+* Additional options such as `dropout` and `norm` in various networks and their submodules
+
+### Changed
+* Base Docker image upgraded to `nvcr.io/nvidia/pytorch:21.08-py3` from `nvcr.io/nvidia/pytorch:21.06-py3`
+* Deprecated input argument `n_classes`, in favor of `num_classes`
+* Deprecated input argument `dimensions` and `ndims`, in favor of `spatial_dims`
+* Updated the Sphinx-based documentation theme for better readability
+* `NdarrayTensor` type is replaced by `NdarrayOrTensor` for simpler annotations
+* Self-attention-based network blocks now support both 2D and 3D inputs
+
+### Removed
+* The deprecated `TransformInverter`, in favor of `monai.transforms.InvertD`
+* GitHub self-hosted CI/CD pipelines for nightly and post-merge tests
+* `monai.handlers.utils.evenly_divisible_all_gather`
+* `monai.handlers.utils.string_list_all_gather`
+
+### Fixed
+* A Multi-thread cache writing issue in `LMDBDataset`
+* Output shape convention inconsistencies of the image readers
+* Output directory and file name flexibility issue for `NiftiSaver`, `PNGSaver`
+* Requirement of the `label` field in test-time augmentation
+* Input argument flexibility issues for  `ThreadDataLoader`
+* Decoupled `Dice` and `CrossEntropy` intermediate results in `DiceCELoss`
+* Improved documentation, code examples, and warning messages in various modules
+* Various usability issues reported by users
 
 ## [0.6.0] - 2021-07-08
 ### Added
@@ -25,6 +68,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * Fully compatible with PyTorch 1.9
 * `--disttests` and `--min` options for `runtests.sh`
 * Initial support of pre-merge tests with Nvidia Blossom system
+
 ### Changed
 * Base Docker image upgraded to `nvcr.io/nvidia/pytorch:21.06-py3` from
   `nvcr.io/nvidia/pytorch:21.04-py3`
@@ -34,11 +78,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 * Unified the terms: `post_transform` is renamed to `postprocessing`, `pre_transform` is renamed to `preprocessing`
 * Unified the postprocessing transforms and event handlers to accept the "channel-first" data format
 * `evenly_divisible_all_gather` and `string_list_all_gather` moved to `monai.utils.dist`
+
 ### Removed
 * Support of 'batched' input for postprocessing transforms and event handlers
 * `TorchVisionFullyConvModel`
 * `set_visible_devices` utility function
 * `SegmentationSaver` and `TransformsInverter` handlers
+
 ### Fixed
 * Issue of handling big-endian image headers
 * Multi-thread issue for non-random transforms in the cache-based datasets
@@ -269,9 +315,11 @@ the postprocessing steps should be used before calling the metrics methods
 * Optionally depend on PyTorch-Ignite v0.4.2 instead of v0.3.0
 * Optionally depend on torchvision, ITK
 * Enhanced CI tests with 8 new testing environments
+
 ### Removed
 * `MONAI/examples` folder (relocated into [`Project-MONAI/tutorials`](https://github.com/Project-MONAI/tutorials))
 * `MONAI/research` folder (relocated to [`Project-MONAI/research-contributions`](https://github.com/Project-MONAI/research-contributions))
+
 ### Fixed
 * `dense_patch_slices` incorrect indexing
 * Data type issue in `GeneralizedWassersteinDiceLoss`
@@ -302,6 +350,7 @@ the postprocessing steps should be used before calling the metrics methods
 * Cross-platform CI tests supporting multiple Python versions
 * Optional import mechanism
 * Experimental features for third-party transforms integration
+
 ### Changed
 > For more details please visit [the project wiki](https://github.com/Project-MONAI/MONAI/wiki/Notable-changes-between-0.1.0-and-0.2.0)
 * Core modules now require numpy >= 1.17
@@ -311,9 +360,11 @@ the postprocessing steps should be used before calling the metrics methods
 * Base Docker image upgraded to `nvcr.io/nvidia/pytorch:20.03-py3` from `nvcr.io/nvidia/pytorch:19.10-py3`
 * Enhanced local testing tools
 * Documentation website domain changed to https://docs.monai.io
+
 ### Removed
 * Support of Python < 3.6
 * Automatic installation of optional dependencies including pytorch-ignite, nibabel, tensorboard, pillow, scipy, scikit-image
+
 ### Fixed
 * Various issues in type and argument names consistency
 * Various issues in docstring and documentation site
@@ -336,7 +387,8 @@ the postprocessing steps should be used before calling the metrics methods
 
 [highlights]: https://github.com/Project-MONAI/MONAI/blob/master/docs/source/highlights.md
 
-[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/0.6.0...HEAD
+[Unreleased]: https://github.com/Project-MONAI/MONAI/compare/0.7.0...HEAD
+[0.7.0]: https://github.com/Project-MONAI/MONAI/compare/0.6.0...0.7.0
 [0.6.0]: https://github.com/Project-MONAI/MONAI/compare/0.5.3...0.6.0
 [0.5.3]: https://github.com/Project-MONAI/MONAI/compare/0.5.0...0.5.3
 [0.5.0]: https://github.com/Project-MONAI/MONAI/compare/0.4.0...0.5.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0dce26582a..954549581a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -289,9 +289,9 @@ When major features are ready for a milestone, to prepare for a new release:
   repository's artifacts (e.g. the file at https://github.com/Project-MONAI/MONAI/actions/runs/66570977).
 - Check the release test at [TestPyPI](https://test.pypi.org/project/monai/), download the artifacts when the CI finishes.
 - Optionally run [the cron testing jobs](https://github.com/Project-MONAI/MONAI/blob/dev/.github/workflows/cron.yml) on `releasing/[version number]`.
+- Rebase `releasing/[version number]` to `main`, make sure all the test pipelines succeed.
 - Once the release candidate is verified, tag and push a milestone, for example, `git push origin 0.1.0`.
   The tag must be with the latest commit of `releasing/[version number]`.
-- Rebase `releasing/[version number]` to `main`, make sure all the test pipelines succeed.
 - Upload the packages to [PyPI](https://pypi.org/project/monai/).
   This could be done manually by ``twine upload dist/*``, given the artifacts are unzipped to the folder ``dist/``.
 - Merge `releasing/[version number]` to `dev`, this step must make sure that the tagging commit unchanged on `dev`.
diff --git a/README.md b/README.md
index e9facef64d..e08b1d07a8 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ Its ambitions are:
 
 ## Features
 > _The codebase is currently under active development._
-> _Please see [the technical highlights](https://docs.monai.io/en/latest/highlights.html) and [What's New in 0.6](https://docs.monai.io/en/latest/whatsnew_0_6.html) of the current milestone release._
+> _Please see [the technical highlights](https://docs.monai.io/en/latest/highlights.html) and [What's New](https://docs.monai.io/en/latest/whatsnew.html) of the current milestone release._
 
 - flexible pre-processing for multi-dimensional medical imaging data;
 - compositional & portable APIs for ease of integration in existing workflows;
diff --git a/docs/images/fast_training.png b/docs/images/fast_training.png
index d0584b9dac..34e47bcb21 100644
Binary files a/docs/images/fast_training.png and b/docs/images/fast_training.png differ
diff --git a/docs/images/nsight_comparison.png b/docs/images/nsight_comparison.png
new file mode 100644
index 0000000000..9b91826513
Binary files /dev/null and b/docs/images/nsight_comparison.png differ
diff --git a/docs/images/threaddataloader.png b/docs/images/threaddataloader.png
new file mode 100644
index 0000000000..565df8d0d4
Binary files /dev/null and b/docs/images/threaddataloader.png differ
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 00dd4d2c1e..53eb6d3c0d 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,6 +1,6 @@
 -f https://download.pytorch.org/whl/cpu/torch-1.6.0%2Bcpu-cp37-cp37m-linux_x86_64.whl
 torch>=1.5
-pytorch-ignite==0.4.5
+pytorch-ignite==0.4.6
 numpy>=1.17
 itk>=5.2
 nibabel
@@ -20,3 +20,4 @@ sphinxcontrib-serializinghtml
 sphinx-autodoc-typehints==1.11.1
 pandas
 einops
+transformers
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 324be8a0fd..46c905f99c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -18,7 +18,7 @@
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
 print(sys.path)
 
-import monai  # noqa: E402
+import monai
 
 # -- Project information -----------------------------------------------------
 project = "MONAI"
diff --git a/docs/source/handlers.rst b/docs/source/handlers.rst
index 5caccc6b4b..cb4333d1da 100644
--- a/docs/source/handlers.rst
+++ b/docs/source/handlers.rst
@@ -150,11 +150,6 @@ GarbageCollector handler
 .. autoclass:: GarbageCollector
     :members:
 
-Transform inverter
-------------------
-.. autoclass:: TransformInverter
-    :members:
-
 Post processing
 ---------------
 .. autoclass:: PostProcessing
diff --git a/docs/source/highlights.md b/docs/source/highlights.md
index 141c0846d1..2db79b4821 100644
--- a/docs/source/highlights.md
+++ b/docs/source/highlights.md
@@ -16,7 +16,7 @@ The overall architecture and modules are shown in the following figure:
 The rest of this page provides more details for each module.
 
 * [Data I/O, processing and augmentation](#medical-image-data-i-o-processing-and-augmentation)
-* [Datasets](#datasets)
+* [Datasets and DataLoader](#datasets-and-dataloader)
 * [Loss functions](#losses)
 * [Optimizers](#optimizers)
 * [Network architectures](#network-architectures)
@@ -25,7 +25,7 @@ The rest of this page provides more details for each module.
 * [Result writing](#result-writing)
 * [Workflows](#workflows)
 * [Research](#research)
-* [GPU acceleration](#gpu-acceleration)
+* [Performance optimization and GPU acceleration](#performance-optimization-and-gpu-acceleration)
 * [Applications](#applications)
 
 ## Medical image data I/O, processing and augmentation
@@ -56,8 +56,15 @@ transformations. These currently include, for example:
 [2D transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/transforms_demo_2d.ipynb) shows the detailed usage of several MONAI medical image specific transforms.
 ![2d transform examples](../images/medical_transforms.png)
 
-### 3. Fused spatial transforms and GPU acceleration
-As medical image volumes are usually large (in multi-dimensional arrays), pre-processing performance affects the overall pipeline speed. MONAI provides affine transforms to execute fused spatial operations, supports GPU acceleration via native PyTorch for high performance.
+
+### 3. Transforms support both NumPy array and PyTorch Tensor (CPU or GPU accelerated)
+From MONAI v0.7 we introduced PyTorch `Tensor` based computation in transforms, many transforms already support both `NumPy array` and `Tensor` as input types and computational backends. To get the supported backends of every transform, please execute: `python monai/transforms/utils.py`.
+
+To accelerate the transforms, a common approach is to leverage GPU parallel-computation. Users can first convert input data into GPU Tensor by `ToTensor` or `EnsureType` transform, then the following transforms can execute on GPU based on PyTorch `Tensor` APIs.
+GPU transform tutorial is available at [Spleen fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
+
+### 4. Fused spatial transforms
+As medical image volumes are usually large (in multi-dimensional arrays), pre-processing performance affects the overall pipeline speed. MONAI provides affine transforms to execute fused spatial operations.
 
 For example:
 ```py
@@ -67,20 +74,21 @@ affine = Affine(
     scale_params=(1.2, 1.2),
     translate_params=(200, 40),
     padding_mode='zeros',
-    device=torch.device('cuda:0')
 )
 # convert the image using bilinear interpolation
 new_img = affine(image, spatial_size=(300, 400), mode='bilinear')
 ```
 Experiments and test results are available at [Fused transforms test](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/transform_speed.ipynb).
 
-Currently, all the geometric image transforms (Spacing, Zoom, Rotate, Resize, etc.) are designed based on the PyTorch native interfaces. [Geometric transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/3d_image_transforms.ipynb) indicates the usage of affine transforms with 3D medical images.
+Currently, all the geometric image transforms (Spacing, Zoom, Rotate, Resize, etc.) are designed based on the PyTorch native interfaces. So all of them support GPU acceleration via `GPU Tensor` operations for high performance.
+
+[Geometric transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/3d_image_transforms.ipynb) indicates the usage of affine transforms with 3D medical images.
 ![3d transform examples](../images/affine.png)
 
-### 4. Randomly crop out batch images based on positive/negative ratio
+### 5. Randomly crop out batch images based on positive/negative ratio
 Medical image data volume may be too large to fit into GPU memory. A widely-used approach is to randomly draw small size data samples during training and run a “sliding window” routine for inference.  MONAI currently provides general random sampling strategies including class-balanced fixed ratio sampling which may help stabilize the patch-based training process. A typical example is in [Spleen 3D segmentation tutorial](https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/spleen_segmentation_3d.ipynb), which achieves the class-balanced sampling with `RandCropByPosNegLabel` transform.
 
-### 5. Deterministic training for reproducibility
+### 6. Deterministic training for reproducibility
 Deterministic training support is necessary and important for deep learning research, especially in the medical field. Users can easily set the random seed to all the random transforms in MONAI locally and will not affect other non-deterministic modules in the user's program.
 
 For example:
@@ -99,16 +107,16 @@ Users can also enable/disable deterministic at the beginning of training program
 monai.utils.set_determinism(seed=0, additional_settings=None)
 ```
 
-### 6. Multiple transform chains
+### 7. Multiple transform chains
 To apply different transforms on the same data and concatenate the results, MONAI provides `CopyItems` transform to make copies of specified items in the data dictionary and `ConcatItems` transform to combine specified items on the expected dimension, and also provides `DeleteItems` transform to delete unnecessary items to save memory.
 
 Typical usage is to scale the intensity of the same image into different ranges and concatenate the results together.
 ![multiple transform chains](../images/multi_transform_chains.png)
 
-### 7. Debug transforms with DataStats
+### 8. Debug transforms with DataStats
 When transforms are combined with the "compose" function, it's not easy to track the output of a specific transform. To help debug errors in the composed transforms, MONAI provides utility transforms such as `DataStats` to print out intermediate data properties such as `data shape`, `value range`, `data value`, `Additional information`, etc. It's a self-contained transform and can be integrated into any transform chain.
 
-### 8. Post-processing transforms for model output
+### 9. Post-processing transforms for model output
 MONAI also provides post-processing transforms for handling the model outputs. Currently, the transforms include:
 - Adding an activation layer (Sigmoid, Softmax, etc.).
 - Converting to discrete values (Argmax, One-Hot, Threshold value, etc), as below figure (b).
@@ -119,12 +127,19 @@ MONAI also provides post-processing transforms for handling the model outputs. C
 After decollating the batch data of model output and applying the post-processing transforms, it's easier to compute metrics, save model output into files or visualize data in the TensorBoard. [Postprocessing transforms tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/postprocessing_transforms.ipynb) shows an example with several main transforms for post-processing.
 ![post-processing transforms](../images/postprocessing_transforms.png)
 
-### 9. Integrate third-party transforms
+### 10. Integrate third-party transforms
 The design of MONAI transforms emphasis code readability and usability. It works for array data or dictionary-based data. MONAI also provides `Adaptor` tools to accommodate different data format for 3rd party transforms. To convert the data shapes or types, utility transforms such as `ToTensor`, `ToNumpy`, `SqueezeDim` are also provided. So it's easy to enhance the transform chain by seamlessly integrating transforms from external packages, including: `ITK`, `BatchGenerator`, `TorchIO` and `Rising`.
 
 For more details, please check out the tutorial: [integrate 3rd party transforms into MONAI program](https://github.com/Project-MONAI/tutorials/blob/master/modules/integrate_3rd_party_transforms.ipynb).
 
-### 10. IO factory for medical image formats
+In digital pathology training, due to the immense burden of loading images, the CPU is preoccupied by loading images and cannot catch up with preparing the data. This causes the pipeline to become IO bound and results in under-utilization of GPU. To overcome this bottleneck, [cuCIM](https://github.com/rapidsai/cucim) has implemented an optimized version of several common transforms that we are using in digital pathology pipeline. These transforms are natively being run on GPU and act on CuPy arrays. MONAI provides `CuCIM` and `RandCuCIM` adapters to integrate the `cuCIM` library. For instance:
+```py
+RandCuCIM(name="color_jitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04)
+CuCIM(name="scale_intensity_range", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0)
+```
+It has shown a significant speed up in pathology training metastasis detection model.
+
+### 11. IO factory for medical image formats
 Many popular image formats exist in the medical domain, and they are quite different with rich metadata information. To easily handle different medical image formats in the same pipeline, [MONAI provides `LoadImage` transform](https://github.com/Project-MONAI/tutorials/blob/master/modules/load_medical_images.ipynb), which can automatically choose image readers based on the supported suffixes and in the following priority order:
 - User-specified reader at runtime when calling this loader.
 - Registered readers from the latest to the first in the list.
@@ -134,13 +149,13 @@ The `ImageReader` API is quite straightforward, users can easily extend it for t
 
 With these pre-defined image readers, MONAI can load images in formats: `NIfTI`, `DICOM`, `PNG`, `JPG`, `BMP`, `NPY/NPZ`, etc.
 
-### 11. Save transform data into NIfTI or PNG files
+### 12. Save transform data into NIfTI or PNG files
 To convert images into files or debug the transform chain, MONAI provides `SaveImage` transform. Users can inject this transform into the transform chain to save the results.
 
-### 12. Automatically ensure `channel-first` data shape
+### 13. Automatically ensure `channel-first` data shape
 Medical images have different shape formats. They can be `channel-last`, `channel-first` or even `no-channel`. We may, for example, want to load several `no-channel` images and stack them as `channel-first` data. To improve the user experience, MONAI provided an `EnsureChannelFirst` transform to automatically detect data shape according to the meta information and convert it to the `channel-first` format consistently.
 
-### 13. Invert spatial transforms and test-time augmentations
+### 14. Invert spatial transforms and test-time augmentations
 It is often desirable to invert the previously applied spatial transforms (resize, flip, rotate, zoom, crop, pad, etc.) within the deep learning workflows, for example, to resume to the original imaging space after processing the image data in a normalized data space.  Many spatial transforms are enhanced with an `inverse` operation since in v0.5. The [model inference tutorial](https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/torch/unet_inference_dict.py) shows a basic example.
 
 If the pipeline includes random transformations, users may want to observe the effect that these transformations have on the output. The typical approach is that we pass the same input through the transforms multiple times with different random realizations. Then use the inverse transforms to move all the results to a common space, and calculate the metrics. MONAI provided `TestTimeAugmentation` for this feature, which by default will calculate the `mode`, `mean`, `standard deviation` and `volume variation coefficient`.
@@ -153,7 +168,7 @@ If the pipeline includes random transformations, users may want to observe the e
 (2) The TTA results of `mode`, `mean` and `standard deviation`:
 ![test time augmentation](../images/tta.png)
 
-## Datasets
+## Datasets and DataLoader
 ### 1. Cache IO and transforms data to accelerate training
 Users often need to train the model with many (potentially thousands of) epochs over the data to achieve the desired model quality. A native PyTorch implementation may repeatedly load data and run the same preprocessing steps for every epoch during training, which can be time-consuming and unnecessary, especially when the medical image volumes are large.
 
@@ -221,6 +236,11 @@ The `partition_dataset` utility in MONAI can perform different types of partitio
 CSV tables are often used in additional to image data to incorporate adjunct information, such as patient demographics, lab results, image acquisition parameters and other non-image data, MONAI provides `CSVDataset` to load CSV files and `CSVIterableDataset` to load large CSV files with scalable data access.
 In addition to the regular preprocessing transform while loading, it also supports multiple CSV files loading, joining tables, rows and columns selection and grouping. [CSVDatasets tutorial](https://github.com/Project-MONAI/tutorials/blob/master/modules/csv_datasets.ipynb) shows detailed usage examples.
 
+### 9. `ThreadDataLoader` vs. `DataLoader`
+If the transforms are light-weighted, especially when we cache all the data in RAM, the multiprocessing of PyTorch `DataLoader` may cause unnecessary IPC time and cause the drop of GPU utilization after every epoch. MONAI provides `ThreadDataLoader` which executes the transforms in a separate thread:
+![threaddataloader](../images/threaddataloader.png)
+a `ThreadDataLoader` example is available at [Spleen fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
+
 ## Losses
 There are domain-specific loss functions in the medical imaging research which are not typically used in generic computer vision tasks. As an important module of MONAI, these loss functions are implemented in PyTorch, such as `DiceLoss`, `GeneralizedDiceLoss`, `MaskedDiceLoss`, `TverskyLoss`, `FocalLoss`, `DiceCELoss`, and `DiceFocalLoss`, etc.
 
@@ -249,7 +269,7 @@ add_module('conv1', conv_type(in_channels, out_channels, kernel_size=1, bias=Fal
 ```
 
 ### 2. Implementation of generic 2D/3D networks
-And there are several 1D/2D/3D-compatible implementations of intermediate blocks and generic networks, such as UNet, DynUNet, DenseNet, GAN, AHNet, VNet, SENet(and SEResNet, SEResNeXt), SegResNet, EfficientNet, Attention-based networks. All the networks can support PyTorch serialization pipeline based on `torch.jit.script`.
+And there are several 1D/2D/3D-compatible implementations of intermediate blocks and generic networks, such as UNet, DynUNet, DenseNet, GAN, AHNet, VNet, SENet(and SEResNet, SEResNeXt), SegResNet, EfficientNet, Attention-based transformer networks. All the networks can support PyTorch serialization pipeline based on `torch.jit.script`.
 
 ### 3. Network adapter to finetune final layers
 Instead of training from scratch, we often leverage the existing models, and finetune the final layers of a network for new learning tasks. MONAI provides a `NetAdapter` to easily replace the last layer of a model by a convolutional layer or a fully-connected layer. A typical usage example is to adapt [Torchvision models trained with ImageNet](https://pytorch.org/vision/stable/models.html) for other learning tasks.
@@ -366,10 +386,15 @@ G. Wang, X. Liu, C. Li, Z. Xu, J. Ruan, H. Zhu, T. Meng, K. Li, N. Huang, S. Zha
 Wentao Zhu, Can Zhao, Wenqi Li, Holger Roth, Ziyue Xu, and Daguang Xu (2020) "LAMP: Large Deep Nets with Automated Model Parallelism for Image Segmentation." MICCAI 2020 (Early Accept, paper link: https://arxiv.org/abs/2006.12575)
 ![LAMP UNet](../images/unet-pipe.png)
 
-## GPU acceleration
+## Performance optimization and GPU acceleration
+Typically, model training is a time-consuming step during deep learning development, especially in medical imaging applications. Volumetric medical images are usually large (as multi-dimensional arrays) and the model training process can be complex. Even with powerful hardware (e.g. CPU/GPU with large RAM), it is not easy to fully leverage them to achieve high performance. MONAI provides a fast training guide to achieve the best performance: https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_model_training_guide.md.
+
 NVIDIA GPUs have been widely applied in many areas of deep learning training and evaluation, and the CUDA parallel computation shows obvious acceleration when comparing to traditional computation methods. To fully leverage GPU features, many popular mechanisms raised, like automatic mixed precision (AMP), distributed data parallel, etc. MONAI can support these features and provides rich examples.
 
-### 1. Auto mixed precision(AMP)
+### 1. Profiling the pipelines
+First of all, MONAI provides several methods based on `DLProf`, `Nsight`, `NVTX` and `NVML` for users to analyze their programs to identify the performance bottleneck. The analyses include operation-based GPU activity and overall GPU activity during model training. They will greatly help users manage computing bottlenecks and provide insights for the area to be improved for better computing efficiency. The detailed example is shown in the [performance profiling tutorial]( https://github.com/Project-MONAI/tutorials/blob/master/performance_profiling/profiling_train_base_nvtx.ipynb).
+
+### 2. Auto mixed precision(AMP)
 In 2017, NVIDIA researchers developed a methodology for mixed-precision training, which combined single-precision (FP32) with half-precision (e.g. FP16) format when training a network, and it achieved the same accuracy as FP32 training using the same hyperparameters.
 
 For the PyTorch 1.6 release, developers at NVIDIA and Facebook moved mixed precision functionality into PyTorch core as the AMP package, `torch.cuda.amp`.
@@ -379,16 +404,16 @@ MONAI workflows can easily set `amp=True/False` in `SupervisedTrainer` or `Super
 We also executed the same test program on NVIDIA A100 GPU with the same software environment, obtained faster results:
 ![amp a100 results](../images/amp_training_a100.png)
 More details is available at [AMP training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/automatic_mixed_precision.ipynb).
-We also tried to combine AMP with `CacheDataset` and `Novograd` optimizer to achieve the fast training in MONAI, able to obtain approximately 12x speedup compared with a Pytorch native implementation when the training converges at a validation mean dice of 0.93. Benchmark for reference:
+We also tried to combine `AMP` with `CacheDataset`, `GPU cache`, `GPU transforms`, `ThreadDataLoader`, `DiceCE` loss function and `Novograd` optimizer to achieve the fast training in MONAI, able to obtain approximately `200x` speedup compared with a Pytorch native implementation when the training converges at a validation mean dice of `0.95`. Benchmark for reference:
 ![fast training results](../images/fast_training.png)
 More details is available at [Fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
 
-### 2. Distributed data parallel
+### 3. Distributed data parallel
 Distributed data parallel is an important feature of PyTorch to connect multiple GPU devices on single or multiple nodes to train or evaluate models. The distributed data parallel APIs of MONAI are compatible with native PyTorch distributed module, pytorch-ignite distributed module, Horovod, XLA, and the SLURM platform. MONAI provides demos for reference: train/evaluate with PyTorch DDP, train/evaluate with Horovod, train/evaluate with Ignite DDP, partition dataset and train with SmartCacheDataset, as well as a real world training example based on Decathlon challenge Task01 - Brain Tumor segmentation.  The demo contains distributed caching, training, and validation. We obtained performance benchmarks for reference (based on PyTorch 1.6, CUDA 11, NVIDIA V100 GPUs):
 
 ![distributed training results](../images/distributed_training.png)
 
-### 3. C++/CUDA optimized modules
+### 4. C++/CUDA optimized modules
 To further accelerate the domain-specific routines in the workflows, MONAI C++/CUDA implementation are introduced as extensions of the PyTorch native implementations.
 MONAI provides the modules using [the two ways of building C++ extensions from PyTorch](https://pytorch.org/tutorials/advanced/cpp_extension.html#custom-c-and-cuda-extensions):
 - via `setuptools`, for modules including `Resampler`, `Conditional random field (CRF)`, `Fast bilateral filtering using the permutohedral lattice`.
@@ -396,6 +421,26 @@ MONAI provides the modules using [the two ways of building C++ extensions from P
 The following figure shows results of MONAI's Gaussian mixture models applied to tissue and surgical tools segmentation:
 ![Gaussian mixture models as a postprocessing step](../images/gmm_feature_set_comparison_s.png)
 
+### 5. Cache IO and transforms data to GPU memory
+Even with `CacheDataset`, we usually need to copy the same data to GPU memory for GPU random transforms or network computation in every epoch. An efficient approach is to cache the data to GPU memory directly, then every epoch can start from GPU computation immediately.
+
+For example:
+```py
+train_transforms = [
+    LoadImaged(...),
+    AddChanneld(...),
+    Spacingd(...),
+    Orientationd(...),
+    ScaleIntensityRanged(...),
+    EnsureTyped(..., data_type="tensor"),
+    ToDeviced(..., device="cuda:0"),
+    RandCropByPosNegLabeld(...),
+]
+dataset = CacheDataset(..., transform=train_trans)
+```
+Here we convert to PyTorch `Tensor` with `EnsureTyped` transform and move data to GPU with `ToDeviced` transform. `CacheDataset` caches the transform results until `ToDeviced`, so it is in GPU memory. Then in every epoch, the program fetches cached data from GPU memory and only executes the random transform `RandCropByPosNegLabeld` on GPU directly.
+GPU caching example is available at [Spleen fast training tutorial](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb).
+
 ## Applications
 The research area of medical image deep learning is expanding fast. To apply the latest achievements into applications, MONAI contains many application components to build end-to-end solutions or prototypes for other similar use cases.
 
@@ -417,3 +462,8 @@ Starting from v0.5.0, MONAI provides experimental features for building learning
 The following figure shows the registration of CT images acquired at different time points for a single patient using MONAI:
 
 ![3d registration](../images/3d_paired.png)
+
+### 4. Reproducing the state-of-the-art Kaggle competition solutions
+[A reimplementation](https://github.com/Project-MONAI/tutorials/tree/master/kaggle/RANZCR/4th_place_solution) of the 4th place solution of RANZCR CLiP - Catheter and Line Position Challenge in Kaggle: https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification
+
+The original solution is produced by Team Watercooled, and the authors are Dieter (https://www.kaggle.com/christofhenkel) and Psi (https://www.kaggle.com/philippsinger).
diff --git a/docs/source/installation.md b/docs/source/installation.md
index 08ab109142..4bc4aa700a 100644
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -174,9 +174,9 @@ Since MONAI v0.2.0, the extras syntax such as `pip install 'monai[nibabel]'` is
 
 - The options are
 ```
-[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops]
+[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers]
 ```
 which correspond to `nibabel`, `scikit-image`, `pillow`, `tensorboard`,
-`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas` and `einops`, respectively.
+`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`  and `transformers`, respectively.
 
 - `pip install 'monai[all]'` installs all the optional dependencies.
diff --git a/docs/source/networks.rst b/docs/source/networks.rst
index 54c2756535..36d62752d4 100644
--- a/docs/source/networks.rst
+++ b/docs/source/networks.rst
@@ -500,6 +500,11 @@ Nets
 .. autoclass:: Critic
   :members:
 
+`Transchex`
+~~~~~~~~~~~~~~~~
+.. autoclass:: Transchex
+  :members:
+
 `NetAdapter`
 ~~~~~~~~~~~~
 .. autoclass:: NetAdapter
diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst
index b8f57e0dbe..65c1f7efb6 100644
--- a/docs/source/transforms.rst
+++ b/docs/source/transforms.rst
@@ -53,80 +53,115 @@ Generic Interfaces
 .. autoclass:: Decollated
     :members:
 
+`OneOf`
+^^^^^^^
+.. autoclass:: OneOf
+    :members:
+
 Vanilla Transforms
 ------------------
 
 Crop and Pad
 ^^^^^^^^^^^^
 
+`Pad`
+"""""
+.. autoclass:: Pad
+    :members:
+    :special-members: __call__
+
 `SpatialPad`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialPad.png
+    :alt: example of SpatialPad
 .. autoclass:: SpatialPad
     :members:
     :special-members: __call__
 
 `BorderPad`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/BorderPad.png
+    :alt: example of BorderPad
 .. autoclass:: BorderPad
     :members:
     :special-members: __call__
 
 `DivisiblePad`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/DivisiblePad.png
+    :alt: example of DivisiblePad
 .. autoclass:: DivisiblePad
     :members:
     :special-members: __call__
 
 `SpatialCrop`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialCrop.png
+    :alt: example of SpatialCrop
 .. autoclass:: SpatialCrop
     :members:
     :special-members: __call__
 
 `CenterSpatialCrop`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterSpatialCrop.png
+    :alt: example of CenterSpatialCrop
 .. autoclass:: CenterSpatialCrop
     :members:
     :special-members: __call__
 
 `RandSpatialCrop`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCrop.png
+    :alt: example of RandSpatialCrop
 .. autoclass:: RandSpatialCrop
     :members:
     :special-members: __call__
 
 `RandSpatialCropSamples`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCropSamples.png
+    :alt: example of RandSpatialCropSamples
 .. autoclass:: RandSpatialCropSamples
     :members:
     :special-members: __call__
 
 `CropForeground`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CropForeground.png
+    :alt: example of CropForeground
 .. autoclass:: CropForeground
     :members:
     :special-members: __call__
 
 `RandWeightedCrop`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandWeightedCrop.png
+    :alt: example of RandWeightedCrop
 .. autoclass:: RandWeightedCrop
     :members:
     :special-members: __call__
 
 `RandCropByPosNegLabel`
 """""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByPosNegLabel.png
+    :alt: example of RandCropByPosNegLabel
 .. autoclass:: RandCropByPosNegLabel
     :members:
     :special-members: __call__
 
 `RandCropByLabelClasses`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByLabelClasses.png
+    :alt: example of RandCropByLabelClasses
 .. autoclass:: RandCropByLabelClasses
     :members:
     :special-members: __call__
 
 `ResizeWithPadOrCrop`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ResizeWithPadOrCrop.png
+    :alt: example of ResizeWithPadOrCrop
 .. autoclass:: ResizeWithPadOrCrop
     :members:
     :special-members: __call__
@@ -139,12 +174,16 @@ Crop and Pad
 
 `RandScaleCrop`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleCrop.png
+    :alt: example of RandScaleCrop
 .. autoclass:: RandScaleCrop
     :members:
     :special-members: __call__
 
 `CenterScaleCrop`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterScaleCrop.png
+    :alt: example of CenterScaleCrop
 .. autoclass:: CenterScaleCrop
     :members:
     :special-members: __call__
@@ -154,90 +193,120 @@ Intensity
 
 `RandGaussianNoise`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianNoise.png
+    :alt: example of RandGaussianNoise
 .. autoclass:: RandGaussianNoise
     :members:
     :special-members: __call__
 
 `ShiftIntensity`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ShiftIntensity.png
+    :alt: example of ShiftIntensity
 .. autoclass:: ShiftIntensity
     :members:
     :special-members: __call__
 
 `RandShiftIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandShiftIntensity.png
+    :alt: example of RandShiftIntensity
 .. autoclass:: RandShiftIntensity
     :members:
     :special-members: __call__
 
 `StdShiftIntensity`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/StdShiftIntensity.png
+    :alt: example of StdShiftIntensity
 .. autoclass:: StdShiftIntensity
     :members:
     :special-members: __call__
 
 `RandStdShiftIntensity`
 """""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandStdShiftIntensity.png
+    :alt: example of RandStdShiftIntensity
 .. autoclass:: RandStdShiftIntensity
     :members:
     :special-members: __call__
 
 `RandBiasField`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandBiasField.png
+    :alt: example of RandBiasField
 .. autoclass:: RandBiasField
     :members:
     :special-members: __call__
 
 `ScaleIntensity`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensity.png
+    :alt: example of ScaleIntensity
 .. autoclass:: ScaleIntensity
     :members:
     :special-members: __call__
 
 `RandScaleIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleIntensity.png
+    :alt: example of RandScaleIntensity
 .. autoclass:: RandScaleIntensity
     :members:
     :special-members: __call__
 
 `NormalizeIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/NormalizeIntensity.png
+    :alt: example of NormalizeIntensity
 .. autoclass:: NormalizeIntensity
     :members:
     :special-members: __call__
 
 `ThresholdIntensity`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ThresholdIntensity.png
+    :alt: example of ThresholdIntensity
 .. autoclass:: ThresholdIntensity
     :members:
     :special-members: __call__
 
 `ScaleIntensityRange`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRange.png
+    :alt: example of ScaleIntensityRange
 .. autoclass:: ScaleIntensityRange
     :members:
     :special-members: __call__
 
 `ScaleIntensityRangePercentiles`
 """"""""""""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRangePercentiles.png
+    :alt: example of ScaleIntensityRangePercentiles
 .. autoclass:: ScaleIntensityRangePercentiles
     :members:
     :special-members: __call__
 
 `AdjustContrast`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AdjustContrast.png
+    :alt: example of AdjustContrast
 .. autoclass:: AdjustContrast
     :members:
     :special-members: __call__
 
 `RandAdjustContrast`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAdjustContrast.png
+    :alt: example of RandAdjustContrast
 .. autoclass:: RandAdjustContrast
     :members:
     :special-members: __call__
 
 `MaskIntensity`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/MaskIntensity.png
+    :alt: example of MaskIntensity
 .. autoclass:: MaskIntensity
     :members:
     :special-members: __call__
@@ -250,30 +319,40 @@ Intensity
 
 `GaussianSmooth`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSmooth.png
+    :alt: example of GaussianSmooth
 .. autoclass:: GaussianSmooth
     :members:
     :special-members: __call__
 
 `RandGaussianSmooth`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSmooth.png
+    :alt: example of RandGaussianSmooth
 .. autoclass:: RandGaussianSmooth
     :members:
     :special-members: __call__
 
 `GaussianSharpen`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSharpen.png
+    :alt: example of GaussianSharpen
 .. autoclass:: GaussianSharpen
     :members:
     :special-members: __call__
 
 `RandGaussianSharpen`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSharpen.png
+    :alt: example of RandGaussianSharpen
 .. autoclass:: RandGaussianSharpen
     :members:
     :special-members: __call__
 
 `RandHistogramShift`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandHistogramShift.png
+    :alt: example of RandHistogramShift
 .. autoclass:: RandHistogramShift
     :members:
     :special-members: __call__
@@ -286,43 +365,63 @@ Intensity
 
 `GibbsNoise`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GibbsNoise.png
+    :alt: example of GibbsNoise
 .. autoclass:: GibbsNoise
     :members:
     :special-members: __call__
 
 `RandGibbsNoise`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGibbsNoise.png
+    :alt: example of RandGibbsNoise
 .. autoclass:: RandGibbsNoise
     :members:
     :special-members: __call__
 
 `KSpaceSpikeNoise`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/KSpaceSpikeNoise.png
+    :alt: example of KSpaceSpikeNoise
 .. autoclass:: KSpaceSpikeNoise
     :members:
     :special-members: __call__
 
 `RandKSpaceSpikeNoise`
 """"""""""""""""""""""
- .. autoclass:: RandKSpaceSpikeNoise
-     :members:
-     :special-members: __call__
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandKSpaceSpikeNoise.png
+    :alt: example of RandKSpaceSpikeNoise
+.. autoclass:: RandKSpaceSpikeNoise
+    :members:
+    :special-members: __call__
+
+`RandCoarseTransform`
+"""""""""""""""""""""
+.. autoclass:: RandCoarseTransform
+    :members:
+    :special-members: __call__
 
 `RandCoarseDropout`
 """""""""""""""""""
- .. autoclass:: RandCoarseDropout
-     :members:
-     :special-members: __call__
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseDropout.png
+    :alt: example of RandCoarseDropout
+.. autoclass:: RandCoarseDropout
+    :members:
+    :special-members: __call__
+
+`RandCoarseShuffle`
+"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseShuffle.png
+    :alt: example of RandCoarseShuffle
+.. autoclass:: RandCoarseShuffle
+    :members:
+    :special-members: __call__
 
 `HistogramNormalize`
 """"""""""""""""""""
- .. autoclass:: HistogramNormalize
-     :members:
-     :special-members: __call__
-
-`LocalPatchShuffling`
-"""""""""""""""""""""
-.. autoclass:: LocalPatchShuffling
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/HistogramNormalize.png
+    :alt: example of HistogramNormalize
+.. autoclass:: HistogramNormalize
     :members:
     :special-members: __call__
 
@@ -381,6 +480,8 @@ Post-processing
 
 `AsDiscrete`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AsDiscrete.png
+    :alt: example of AsDiscrete
 .. autoclass:: AsDiscrete
     :members:
     :special-members: __call__
@@ -393,6 +494,8 @@ Post-processing
 
 `LabelFilter`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelFilter.png
+    :alt: example of LabelFilter
 .. autoclass:: LabelFilter
     :members:
     :special-members: __call__
@@ -405,6 +508,8 @@ Post-processing
 
 `LabelToContour`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelToContour.png
+    :alt: example of LabelToContour
 .. autoclass:: LabelToContour
     :members:
     :special-members: __call__
@@ -431,42 +536,56 @@ Spatial
 
 `Spacing`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Spacing.png
+    :alt: example of Spacing
 .. autoclass:: Spacing
     :members:
     :special-members: __call__
 
 `Orientation`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Orientation.png
+    :alt: example of Orientation
 .. autoclass:: Orientation
     :members:
     :special-members: __call__
 
 `RandRotate`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotate.png
+    :alt: example of RandRotate
 .. autoclass:: RandRotate
     :members:
     :special-members: __call__
 
 `RandFlip`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandFlip.png
+    :alt: example of RandFlip
 .. autoclass:: RandFlip
     :members:
     :special-members: __call__
 
 `RandAxisFlip`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAxisFlip.png
+    :alt: example of RandAxisFlip
 .. autoclass:: RandAxisFlip
     :members:
     :special-members: __call__
 
 `RandZoom`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandZoom.png
+    :alt: example of RandZoom
 .. autoclass:: RandZoom
     :members:
     :special-members: __call__
 
 `Affine`
 """"""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Affine.png
+    :alt: example of Affine
 .. autoclass:: Affine
     :members:
     :special-members: __call__
@@ -479,6 +598,8 @@ Spatial
 
 `RandAffine`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAffine.png
+    :alt: example of RandAffine
 .. autoclass:: RandAffine
     :members:
     :special-members: __call__
@@ -503,48 +624,64 @@ Spatial
 
 `Rand2DElastic`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand2DElastic.png
+    :alt: example of Rand2DElastic
 .. autoclass:: Rand2DElastic
     :members:
     :special-members: __call__
 
 `Rand3DElastic`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand3DElastic.png
+    :alt: example of Rand3DElastic
 .. autoclass:: Rand3DElastic
     :members:
     :special-members: __call__
 
 `Rotate90`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotate90.png
+    :alt: example of Rotate90
 .. autoclass:: Rotate90
     :members:
     :special-members: __call__
 
 `RandRotate90`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotate90.png
+    :alt: example of RandRotate90
 .. autoclass:: RandRotate90
     :members:
     :special-members: __call__
 
 `Flip`
 """"""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Flip.png
+    :alt: example of Flip
 .. autoclass:: Flip
     :members:
     :special-members: __call__
 
 `Resize`
 """"""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Resize.png
+    :alt: example of Resize
 .. autoclass:: Resize
     :members:
     :special-members: __call__
 
 `Rotate`
 """"""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotate.png
+    :alt: example of Rotate
 .. autoclass:: Rotate
     :members:
     :special-members: __call__
 
 `Zoom`
 """"""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Zoom.png
+    :alt: example of Zoom
 .. autoclass:: Zoom
     :members:
     :special-members: __call__
@@ -711,16 +848,28 @@ Utility
 
 `IntensityStats`
 """"""""""""""""
- .. autoclass:: IntensityStats
-     :members:
-     :special-members: __call__
+.. autoclass:: IntensityStats
+    :members:
+    :special-members: __call__
 
 `ToDevice`
 """"""""""
- .. autoclass:: ToDevice
+.. autoclass:: ToDevice
      :members:
      :special-members: __call__
 
+`CuCIM`
+"""""""
+.. autoclass:: CuCIM
+    :members:
+    :special-members: __call__
+
+`RandCuCIM`
+"""""""""""
+.. autoclass:: RandCuCIM
+    :members:
+    :special-members: __call__
+
 
 Dictionary Transforms
 ---------------------
@@ -730,72 +879,96 @@ Crop and Pad (Dict)
 
 `SpatialPadd`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialPadd.png
+    :alt: example of SpatialPadd
 .. autoclass:: SpatialPadd
     :members:
     :special-members: __call__
 
 `BorderPadd`
 """"""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/BorderPadd.png
+    :alt: example of BorderPadd
 .. autoclass:: BorderPadd
     :members:
     :special-members: __call__
 
 `DivisiblePadd`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/DivisiblePadd.png
+    :alt: example of DivisiblePadd
 .. autoclass:: DivisiblePadd
     :members:
     :special-members: __call__
 
 `SpatialCropd`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/SpatialCropd.png
+    :alt: example of SpatialCropd
 .. autoclass:: SpatialCropd
     :members:
     :special-members: __call__
 
 `CenterSpatialCropd`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterSpatialCropd.png
+    :alt: example of CenterSpatialCropd
 .. autoclass:: CenterSpatialCropd
     :members:
     :special-members: __call__
 
 `RandSpatialCropd`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCropd.png
+    :alt: example of RandSpatialCropd
 .. autoclass:: RandSpatialCropd
     :members:
     :special-members: __call__
 
 `RandSpatialCropSamplesd`
 """""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandSpatialCropSamplesd.png
+    :alt: example of RandSpatialCropSamplesd
 .. autoclass:: RandSpatialCropSamplesd
     :members:
     :special-members: __call__
 
 `CropForegroundd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CropForegroundd.png
+    :alt: example of CropForegroundd
 .. autoclass:: CropForegroundd
     :members:
     :special-members: __call__
 
 `RandWeightedCropd`
 """""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandWeightedCropd.png
+    :alt: example of RandWeightedCropd
 .. autoclass:: RandWeightedCropd
     :members:
     :special-members: __call__
 
 `RandCropByPosNegLabeld`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByPosNegLabeld.png
+    :alt: example of RandCropByPosNegLabeld
 .. autoclass:: RandCropByPosNegLabeld
     :members:
     :special-members: __call__
 
 `RandCropByLabelClassesd`
 """""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCropByLabelClassesd.png
+    :alt: example of RandCropByLabelClassesd
 .. autoclass:: RandCropByLabelClassesd
     :members:
     :special-members: __call__
 
 `ResizeWithPadOrCropd`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ResizeWithPadOrCropd.png
+    :alt: example of ResizeWithPadOrCropd
 .. autoclass:: ResizeWithPadOrCropd
     :members:
     :special-members: __call__
@@ -808,12 +981,16 @@ Crop and Pad (Dict)
 
 `RandScaleCropd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleCropd.png
+    :alt: example of RandScaleCropd
 .. autoclass:: RandScaleCropd
     :members:
     :special-members: __call__
 
 `CenterScaleCropd`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/CenterScaleCropd.png
+    :alt: example of CenterScaleCropd
 .. autoclass:: CenterScaleCropd
     :members:
     :special-members: __call__
@@ -823,159 +1000,219 @@ Intensity (Dict)
 
 `RandGaussianNoised`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianNoised.png
+    :alt: example of RandGaussianNoised
 .. autoclass:: RandGaussianNoised
     :members:
     :special-members: __call__
 
 `ShiftIntensityd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ShiftIntensityd.png
+    :alt: example of ShiftIntensityd
 .. autoclass:: ShiftIntensityd
     :members:
     :special-members: __call__
 
 `RandShiftIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandShiftIntensityd.png
+    :alt: example of RandShiftIntensityd
 .. autoclass:: RandShiftIntensityd
     :members:
     :special-members: __call__
 
 `StdShiftIntensityd`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/StdShiftIntensityd.png
+    :alt: example of StdShiftIntensityd
 .. autoclass:: StdShiftIntensityd
     :members:
     :special-members: __call__
 
 `RandStdShiftIntensityd`
 """"""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandStdShiftIntensityd.png
+    :alt: example of RandStdShiftIntensityd
 .. autoclass:: RandStdShiftIntensityd
     :members:
     :special-members: __call__
 
 `RandBiasFieldd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandBiasFieldd.png
+    :alt: example of RandBiasFieldd
 .. autoclass:: RandBiasFieldd
     :members:
     :special-members: __call__
 
 `ScaleIntensityd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityd.png
+    :alt: example of ScaleIntensityd
 .. autoclass:: ScaleIntensityd
     :members:
     :special-members: __call__
 
 `RandScaleIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandScaleIntensityd.png
+    :alt: example of RandScaleIntensityd
 .. autoclass:: RandScaleIntensityd
     :members:
     :special-members: __call__
 
 `NormalizeIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/NormalizeIntensityd.png
+    :alt: example of NormalizeIntensityd
 .. autoclass:: NormalizeIntensityd
     :members:
     :special-members: __call__
 
 `ThresholdIntensityd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ThresholdIntensityd.png
+    :alt: example of ThresholdIntensityd
 .. autoclass:: ThresholdIntensityd
     :members:
     :special-members: __call__
 
 `ScaleIntensityRanged`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRanged.png
+    :alt: example of ScaleIntensityRanged
 .. autoclass:: ScaleIntensityRanged
     :members:
     :special-members: __call__
 
 `GibbsNoised`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GibbsNoised.png
+    :alt: example of GibbsNoised
 .. autoclass:: GibbsNoised
     :members:
     :special-members: __call__
 
 `RandGibbsNoised`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGibbsNoised.png
+    :alt: example of RandGibbsNoised
 .. autoclass:: RandGibbsNoised
     :members:
     :special-members: __call__
 
 `KSpaceSpikeNoised`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/KSpaceSpikeNoised.png
+    :alt: example of KSpaceSpikeNoised
 .. autoclass:: KSpaceSpikeNoised
     :members:
     :special-members: __call__
 
 `RandKSpaceSpikeNoised`
 """""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandKSpaceSpikeNoised.png
+    :alt: example of RandKSpaceSpikeNoised
 .. autoclass:: RandKSpaceSpikeNoised
     :members:
     :special-members: __call__
 
 `ScaleIntensityRangePercentilesd`
 """""""""""""""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/ScaleIntensityRangePercentilesd.png
+    :alt: example of ScaleIntensityRangePercentilesd
 .. autoclass:: ScaleIntensityRangePercentilesd
     :members:
     :special-members: __call__
 
 `AdjustContrastd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AdjustContrastd.png
+    :alt: example of AdjustContrastd
 .. autoclass:: AdjustContrastd
     :members:
     :special-members: __call__
 
 `RandAdjustContrastd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAdjustContrastd.png
+    :alt: example of RandAdjustContrastd
 .. autoclass:: RandAdjustContrastd
     :members:
     :special-members: __call__
 
 `MaskIntensityd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/MaskIntensityd.png
+    :alt: example of MaskIntensityd
 .. autoclass:: MaskIntensityd
     :members:
     :special-members: __call__
 
 `GaussianSmoothd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSmoothd.png
+    :alt: example of GaussianSmoothd
 .. autoclass:: GaussianSmoothd
     :members:
     :special-members: __call__
 
 `RandGaussianSmoothd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSmoothd.png
+    :alt: example of RandGaussianSmoothd
 .. autoclass:: RandGaussianSmoothd
     :members:
     :special-members: __call__
 
 `GaussianSharpend`
 """"""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/GaussianSharpend.png
+    :alt: example of GaussianSharpend
 .. autoclass:: GaussianSharpend
     :members:
     :special-members: __call__
 
 `RandGaussianSharpend`
 """"""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandGaussianSharpend.png
+    :alt: example of RandGaussianSharpend
 .. autoclass:: RandGaussianSharpend
     :members:
     :special-members: __call__
 
 `RandHistogramShiftd`
 """""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandHistogramShiftd.png
+    :alt: example of RandHistogramShiftd
 .. autoclass:: RandHistogramShiftd
     :members:
     :special-members: __call__
 
 `RandCoarseDropoutd`
 """"""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseDropoutd.png
+    :alt: example of RandCoarseDropoutd
 .. autoclass:: RandCoarseDropoutd
     :members:
     :special-members: __call__
 
+`RandCoarseShuffled`
+""""""""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandCoarseShuffled.png
+    :alt: example of RandCoarseShuffled
+.. autoclass:: RandCoarseShuffled
+    :members:
+    :special-members: __call__
+
 `HistogramNormalized`
 """""""""""""""""""""
- .. autoclass:: HistogramNormalized
-     :members:
-     :special-members: __call__
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/HistogramNormalized.png
+    :alt: example of HistogramNormalized
+.. autoclass:: HistogramNormalized
+    :members:
+    :special-members: __call__
 
 
 IO (Dict)
@@ -1004,6 +1241,8 @@ Post-processing (Dict)
 
 `AsDiscreted`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/AsDiscreted.png
+    :alt: example of AsDiscreted
 .. autoclass:: AsDiscreted
     :members:
     :special-members: __call__
@@ -1016,6 +1255,8 @@ Post-processing (Dict)
 
 `LabelFilterd`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelFilterd.png
+    :alt: example of LabelFilterd
 .. autoclass:: LabelFilterd
     :members:
     :special-members: __call__
@@ -1028,6 +1269,8 @@ Post-processing (Dict)
 
 `LabelToContourd`
 """""""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/LabelToContourd.png
+    :alt: example of LabelToContourd
 .. autoclass:: LabelToContourd
     :members:
     :special-members: __call__
@@ -1067,96 +1310,128 @@ Spatial (Dict)
 
 `Spacingd`
 """"""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Spacingd.png
+    :alt: example of Spacingd
 .. autoclass:: Spacingd
     :members:
     :special-members: __call__
 
 `Orientationd`
 """"""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Orientationd.png
+    :alt: example of Orientationd
 .. autoclass:: Orientationd
     :members:
     :special-members: __call__
 
 `Flipd`
 """""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Flipd.png
+    :alt: example of Flipd
 .. autoclass:: Flipd
     :members:
     :special-members: __call__
 
 `RandFlipd`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandFlipd.png
+    :alt: example of RandFlipd
 .. autoclass:: RandFlipd
     :members:
     :special-members: __call__
 
 `RandAxisFlipd`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAxisFlipd.png
+    :alt: example of RandAxisFlipd
 .. autoclass:: RandAxisFlipd
     :members:
     :special-members: __call__
 
 `Rotated`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotated.png
+    :alt: example of Rotated
 .. autoclass:: Rotated
     :members:
     :special-members: __call__
 
 `RandRotated`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotated.png
+    :alt: example of RandRotated
 .. autoclass:: RandRotated
     :members:
     :special-members: __call__
 
 `Zoomd`
 """""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Zoomd.png
+    :alt: example of Zoomd
 .. autoclass:: Zoomd
     :members:
     :special-members: __call__
 
 `RandZoomd`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandZoomd.png
+    :alt: example of RandZoomd
 .. autoclass:: RandZoomd
     :members:
     :special-members: __call__
 
 `RandRotate90d`
 """""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandRotate90d.png
+    :alt: example of RandRotate90d
 .. autoclass:: RandRotate90d
     :members:
     :special-members: __call__
 
 `Rotate90d`
 """""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rotate90d.png
+    :alt: example of Rotate90d
 .. autoclass:: Rotate90d
     :members:
     :special-members: __call__
 
 `Resized`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Resized.png
+    :alt: example of Resized
 .. autoclass:: Resized
     :members:
     :special-members: __call__
 
 `Affined`
 """""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Affined.png
+    :alt: example of Affined
 .. autoclass:: Affined
     :members:
     :special-members: __call__
 
 `RandAffined`
 """""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/RandAffined.png
+    :alt: example of RandAffined
 .. autoclass:: RandAffined
     :members:
     :special-members: __call__
 
 `Rand2DElasticd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand2DElasticd.png
+    :alt: example of Rand2DElasticd
 .. autoclass:: Rand2DElasticd
     :members:
     :special-members: __call__
 
 `Rand3DElasticd`
 """"""""""""""""
+.. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/Rand3DElasticd.png
+    :alt: example of Rand3DElasticd
 .. autoclass:: Rand3DElasticd
     :members:
     :special-members: __call__
@@ -1352,10 +1627,21 @@ Utility (Dict)
 
 `ToDeviced`
 """""""""""
- .. autoclass:: ToDeviced
-     :members:
-     :special-members: __call__
+.. autoclass:: ToDeviced
+    :members:
+    :special-members: __call__
+
+`CuCIMd`
+""""""""
+.. autoclass:: CuCIMd
+    :members:
+    :special-members: __call__
 
+`RandCuCIMd`
+""""""""""""
+.. autoclass:: RandCuCIMd
+    :members:
+    :special-members: __call__
 
 Transform Adaptors
 ------------------
@@ -1377,3 +1663,6 @@ Utilities
 ---------
 .. automodule:: monai.transforms.utils
     :members:
+
+.. automodule:: monai.transforms.utils_pytorch_numpy_unification
+    :members:
diff --git a/docs/source/whatsnew.rst b/docs/source/whatsnew.rst
index daed871e14..e1f118cdf6 100644
--- a/docs/source/whatsnew.rst
+++ b/docs/source/whatsnew.rst
@@ -6,5 +6,6 @@ What's New
 .. toctree::
    :maxdepth: 1
 
+   whatsnew_0_7.md
    whatsnew_0_6.md
    whatsnew_0_5.md
diff --git a/docs/source/whatsnew_0_6.md b/docs/source/whatsnew_0_6.md
index bdc419df37..8df0503142 100644
--- a/docs/source/whatsnew_0_6.md
+++ b/docs/source/whatsnew_0_6.md
@@ -1,4 +1,4 @@
-# What's new in 0.6 🎉🎉
+# What's new in 0.6
 
 - Decollating mini-batches as an essential post-processing step
 - Pythonic APIs to load the pretrained models from Clara Train MMARs
diff --git a/docs/source/whatsnew_0_7.md b/docs/source/whatsnew_0_7.md
new file mode 100644
index 0000000000..748729e94d
--- /dev/null
+++ b/docs/source/whatsnew_0_7.md
@@ -0,0 +1,63 @@
+# What's new in 0.7 🎉🎉
+
+- Performance enhancements with profiling and tuning guides
+- Major usability improvements in `monai.transforms`
+- Reimplementing state-of-the-art Kaggle solutions
+- Vision-language multimodal transformer architectures
+
+## Performance enhancements with profiling and tuning guides
+
+Model training is often a time-consuming step during deep learning development,
+especially for medical imaging applications. Even with powerful hardware (e.g.
+CPU/GPU with large RAM), the workflows often require careful profiling and
+tuning to achieve high performance. MONAI has been focusing on performance
+enhancements, and in this version, a fast model training guide is provided
+to help build highly performant workflows, with a comprehensive overview of
+the profiling tools and practical strategies:
+https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_model_training_guide.md.
+
+The following figure shows the use of [Nvidia Nsight™ Systems](https://developer.nvidia.com/nsight-systems) for system-wide
+performance analysis during a performance enhancement study.
+![nsight_vis](../images/nsight_comparison.png)
+
+With the performance profiling and enhancements, several typical use cases were studied to
+improve the training efficiency.  The following figure shows that fast
+training using MONAI can be `200` times faster than a regular baseline ([learn
+more](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb)), and it's `20` times faster than the MONAI v0.6 fast training solution.
+![fast_training](../images/fast_training.png)
+
+## Major usability improvements in `monai.transforms` for NumPy/PyTorch inputs and backends
+
+ MONAI starts to roll out major usability enhancements for the
+ `monai.transforms` module. Many transforms are now supporting both NumPy and
+ PyTorch, as input types and computational backends. To get the supported backends of every transform, please execute: `python monai/transforms/utils.py`.
+
+One benefit of these enhancements is that the users can now better leverage the
+GPUs for preprocessing. By transferring the input data onto GPU using
+`ToTensor` or `EnsureType`, and applying the GPU-based transforms to the data,
+[the tutorial of spleen
+segmentation](https://github.com/Project-MONAI/tutorials/blob/master/acceleration/fast_training_tutorial.ipynb)
+shows the great potential of using the flexible modules for fast and efficient
+training.
+
+## Reimplementing state-of-the-art Kaggle solutions
+
+With this release, we actively evaluate and enhance the quality and flexibility
+of the MONAI core modules, using the public Kaggle challenge as a testbed. [A
+reimplementation](https://github.com/Project-MONAI/tutorials/tree/master/kaggle/RANZCR/4th_place_solution)
+of a state-of-the-art solution at [Kaggle RANZCR CLiP - Catheter and Line
+Position
+Challenge](https://www.kaggle.com/c/ranzcr-clip-catheter-line-classification)
+is made available in this version.
+
+##  Vision-language multimodal transformers
+
+In this release, MONAI adds support for training multimodal (vision + language)
+transformers that can handle both image and textual data. MONAI introduces the
+`TransCheX` model which consists of vision, language, and mixed-modality
+transformer layers for processing chest X-ray and their corresponding
+radiological reports within a unified framework. In addition to `TransCheX`,
+users have the flexibility to alter the architecture by varying the number of
+vision, language and mixed-modality layers and customizing the classification
+head. In addition, the model can be initialized from pre-trained BERT language
+models for fine-tuning.
diff --git a/monai/__init__.py b/monai/__init__.py
index 2c7c920162..5043208b9c 100644
--- a/monai/__init__.py
+++ b/monai/__init__.py
@@ -26,14 +26,14 @@
 
 __basedir__ = os.path.dirname(__file__)
 
-if not (sys.version_info.major == PY_REQUIRED_MAJOR and sys.version_info.minor >= PY_REQUIRED_MINOR):
+if sys.version_info.major != PY_REQUIRED_MAJOR or sys.version_info.minor < PY_REQUIRED_MINOR:
     raise RuntimeError(
         "MONAI requires Python {}.{} or higher. But the current Python is: {}".format(
             PY_REQUIRED_MAJOR, PY_REQUIRED_MINOR, sys.version
         ),
     )
 
-from .utils.module import load_submodules  # noqa: E402
+from .utils.module import load_submodules
 
 # handlers_* have some external decorators the users may not have installed
 # *.so files and folder "_C" may not exist when the cpp extensions are not compiled
diff --git a/monai/_extensions/loader.py b/monai/_extensions/loader.py
index 5f77480ecc..6c68fe08c7 100644
--- a/monai/_extensions/loader.py
+++ b/monai/_extensions/loader.py
@@ -34,7 +34,7 @@ def timeout(time, message):
     except KeyboardInterrupt as e:
         if timer is not None and timer.is_alive():
             raise e  # interrupt from user?
-        raise TimeoutError(message)
+        raise TimeoutError(message) from e
     finally:
         if timer is not None:
             try:
diff --git a/monai/apps/datasets.py b/monai/apps/datasets.py
index c766914026..cff2df3ac7 100644
--- a/monai/apps/datasets.py
+++ b/monai/apps/datasets.py
@@ -111,7 +111,7 @@ def _generate_data_list(self, dataset_dir: str) -> List[Dict]:
             ValueError: When ``section`` is not one of ["training", "validation", "test"].
 
         """
-        class_names = sorted((x for x in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, x))))
+        class_names = sorted(x for x in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, x)))
         self.num_class = len(class_names)
         image_files = [
             [
diff --git a/monai/apps/deepgrow/dataset.py b/monai/apps/deepgrow/dataset.py
index acaeba0bc3..a6bf59d243 100644
--- a/monai/apps/deepgrow/dataset.py
+++ b/monai/apps/deepgrow/dataset.py
@@ -97,7 +97,7 @@ def create_dataset(
         image = os.path.abspath(image)
         label = os.path.abspath(label) if label else None
 
-        logging.info("Image: {}; Label: {}".format(image, label if label else None))
+        logging.info(f"Image: {image}; Label: {label if label else None}")
         data = transforms({image_key: image, label_key: label})
         if dimension == 2:
             data = _save_data_2d(
@@ -154,7 +154,7 @@ def _save_data_2d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
         if vol_label is not None and np.sum(label) == 0:
             continue
 
-        image_file_prefix = "vol_idx_{:0>4d}_slice_{:0>3d}".format(vol_idx, sid)
+        image_file_prefix = f"vol_idx_{vol_idx:0>4d}_slice_{sid:0>3d}"
         image_file = os.path.join(dataset_dir, "images", image_file_prefix)
         image_file += ".npy"
 
@@ -177,7 +177,7 @@ def _save_data_2d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
         unique_labels_count = max(unique_labels_count, len(unique_labels))
 
         for idx in unique_labels:
-            label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx))
+            label_file_prefix = f"{image_file_prefix}_region_{int(idx):0>2d}"
             label_file = os.path.join(dataset_dir, "labels", label_file_prefix)
             label_file += ".npy"
 
@@ -226,7 +226,7 @@ def _save_data_3d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
     label_count = 0
     unique_labels_count = 0
 
-    image_file_prefix = "vol_idx_{:0>4d}".format(vol_idx)
+    image_file_prefix = f"vol_idx_{vol_idx:0>4d}"
     image_file = os.path.join(dataset_dir, "images", image_file_prefix)
     image_file += ".npy"
 
@@ -248,7 +248,7 @@ def _save_data_3d(vol_idx, vol_image, vol_label, dataset_dir, relative_path):
         unique_labels_count = max(unique_labels_count, len(unique_labels))
 
         for idx in unique_labels:
-            label_file_prefix = "{}_region_{:0>2d}".format(image_file_prefix, int(idx))
+            label_file_prefix = f"{image_file_prefix}_region_{int(idx):0>2d}"
             label_file = os.path.join(dataset_dir, "labels", label_file_prefix)
             label_file += ".npy"
 
diff --git a/monai/apps/deepgrow/transforms.py b/monai/apps/deepgrow/transforms.py
index db450792b0..7ededcd94c 100644
--- a/monai/apps/deepgrow/transforms.py
+++ b/monai/apps/deepgrow/transforms.py
@@ -19,7 +19,7 @@
 from monai.transforms import Resize, SpatialCrop
 from monai.transforms.transform import MapTransform, Randomizable, Transform
 from monai.transforms.utils import generate_spatial_bounding_box
-from monai.utils import InterpolateMode, ensure_tuple, ensure_tuple_rep, min_version, optional_import
+from monai.utils import InterpolateMode, deprecated_arg, ensure_tuple, ensure_tuple_rep, min_version, optional_import
 
 measure, _ = optional_import("skimage.measure", "0.14.2", min_version)
 distance_transform_cdt, _ = optional_import("scipy.ndimage.morphology", name="distance_transform_cdt")
@@ -476,7 +476,7 @@ class AddGuidanceFromPointsd(Transform):
         background: key that represents user background (-ve) clicks.
         axis: axis that represents slices in 3D volume. (axis to Depth)
         depth_first: if depth (slices) is positioned at first dimension.
-        dimensions: dimensions based on model used for deepgrow (2D vs 3D).
+        spatial_dims: dimensions based on model used for deepgrow (2D vs 3D).
         slice_key: key that represents applicable slice to add guidance.
         meta_keys: explicitly indicate the key of the meta data dictionary of `ref_image`.
             for example, for data with key `image`, the metadata by default is in `image_meta_dict`.
@@ -486,8 +486,13 @@ class AddGuidanceFromPointsd(Transform):
             to the key data, default is `meta_dict`, the meta data is a dictionary object.
             For example, to handle key `image`,  read/write affine matrices from the
             metadata `image_meta_dict` dictionary's `affine` field.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     """
 
+    @deprecated_arg(name="dimensions", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
         ref_image,
@@ -496,10 +501,11 @@ def __init__(
         background: str = "background",
         axis: int = 0,
         depth_first: bool = True,
-        dimensions: int = 2,
+        spatial_dims: int = 2,
         slice_key: str = "slice",
         meta_keys: Optional[str] = None,
         meta_key_postfix: str = "meta_dict",
+        dimensions: Optional[int] = None,
     ):
         self.ref_image = ref_image
         self.guidance = guidance
@@ -507,7 +513,7 @@ def __init__(
         self.background = background
         self.axis = axis
         self.depth_first = depth_first
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.slice = slice_key
         self.meta_keys = meta_keys
         self.meta_key_postfix = meta_key_postfix
diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py
index 3694ca4144..d86781f4c5 100644
--- a/monai/apps/pathology/data/datasets.py
+++ b/monai/apps/pathology/data/datasets.py
@@ -64,7 +64,7 @@ def __init__(
         self.patch_size = ensure_tuple_rep(patch_size, 2)
 
         self.image_path_list = list({x["image"] for x in self.data})
-        self.image_reader_name = image_reader_name
+        self.image_reader_name = image_reader_name.lower()
         self.image_reader = WSIReader(image_reader_name)
         self.wsi_object_dict = None
         if self.image_reader_name != "openslide":
@@ -190,7 +190,7 @@ def __init__(
         self.patch_size = ensure_tuple_rep(patch_size, 2)
 
         # set up whole slide image reader
-        self.image_reader_name = image_reader_name
+        self.image_reader_name = image_reader_name.lower()
         self.image_reader = WSIReader(image_reader_name)
 
         # process data and create a list of dictionaries containing all required data and metadata
diff --git a/monai/apps/pathology/handlers/prob_map_producer.py b/monai/apps/pathology/handlers/prob_map_producer.py
index 7ac4a0e45b..469e9d3c25 100644
--- a/monai/apps/pathology/handlers/prob_map_producer.py
+++ b/monai/apps/pathology/handlers/prob_map_producer.py
@@ -62,9 +62,10 @@ def attach(self, engine: Engine) -> None:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
 
-        self.num_images = len(engine.data_loader.dataset.data)
+        data_loader = engine.data_loader  # type: ignore
+        self.num_images = len(data_loader.dataset.data)
 
-        for sample in engine.data_loader.dataset.data:
+        for sample in data_loader.dataset.data:
             name = sample["name"]
             self.prob_map[name] = np.zeros(sample["mask_shape"], dtype=self.dtype)
             self.counter[name] = len(sample["mask_locations"])
@@ -84,6 +85,8 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
+        if not isinstance(engine.state.batch, dict) or not isinstance(engine.state.output, dict):
+            raise ValueError("engine.state.batch and engine.state.output must be dictionaries.")
         names = engine.state.batch["name"]
         locs = engine.state.batch["mask_location"]
         pred = engine.state.output["pred"]
diff --git a/monai/apps/utils.py b/monai/apps/utils.py
index 36fac955fe..c54184d8a9 100644
--- a/monai/apps/utils.py
+++ b/monai/apps/utils.py
@@ -81,7 +81,7 @@ def update_to(self, b: int = 1, bsize: int = 1, tsize: Optional[int] = None):
             if not has_tqdm and progress:
                 warnings.warn("tqdm is not installed, will not show the downloading progress bar.")
             urlretrieve(url, filepath)
-    except (URLError, HTTPError, ContentTooShortError, IOError) as e:
+    except (URLError, HTTPError, ContentTooShortError, OSError) as e:
         print(f"Download failed from {url} to {filepath}.")
         raise e
 
diff --git a/monai/config/deviceconfig.py b/monai/config/deviceconfig.py
index 273431fc72..db786a88ef 100644
--- a/monai/config/deviceconfig.py
+++ b/monai/config/deviceconfig.py
@@ -73,6 +73,7 @@ def get_optional_config_values():
     output["psutil"] = psutil_version
     output["pandas"] = get_package_version("pandas")
     output["einops"] = get_package_version("einops")
+    output["transformers"] = get_package_version("transformers")
 
     return output
 
@@ -121,7 +122,7 @@ def get_system_info() -> OrderedDict:
     elif output["System"] == "Darwin":
         _dict_append(output, "Mac version", lambda: platform.mac_ver()[0])
     else:
-        with open("/etc/os-release", "r") as rel_f:
+        with open("/etc/os-release") as rel_f:
             linux_ver = re.search(r'PRETTY_NAME="(.*)"', rel_f.read())
         if linux_ver:
             _dict_append(output, "Linux version", lambda: linux_ver.group(1))
diff --git a/monai/data/dataset.py b/monai/data/dataset.py
index c970e83d0d..ae1d88a4fe 100644
--- a/monai/data/dataset.py
+++ b/monai/data/dataset.py
@@ -575,6 +575,7 @@ def __init__(
         cache_rate: float = 1.0,
         num_workers: Optional[int] = None,
         progress: bool = True,
+        copy_cache: bool = True,
     ) -> None:
         """
         Args:
@@ -587,11 +588,16 @@ def __init__(
             num_workers: the number of worker processes to use.
                 If num_workers is None then the number returned by os.cpu_count() is used.
             progress: whether to display a progress bar.
+            copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
+                default to `True`. if the random transforms don't modify the cache content
+                or every cache item is only used once in a `multi-processing` environment,
+                may set `copy=False` for better performance.
         """
         if not isinstance(transform, Compose):
             transform = Compose(transform)
         super().__init__(data=data, transform=transform)
         self.progress = progress
+        self.copy_cache = copy_cache
         self.cache_num = min(int(cache_num), int(len(data) * cache_rate), len(data))
         self.num_workers = num_workers
         if self.num_workers is not None:
@@ -656,7 +662,8 @@ def _transform(self, index: int):
                 # only need to deep copy data on first non-deterministic transform
                 if not start_run:
                     start_run = True
-                    data = deepcopy(data)
+                    if self.copy_cache:
+                        data = deepcopy(data)
                 data = apply_transform(_transform, data)
         return data
 
@@ -722,6 +729,10 @@ class SmartCacheDataset(Randomizable, CacheDataset):
         shuffle: whether to shuffle the whole data list before preparing the cache content for first epoch.
             it will not modify the original input data sequence in-place.
         seed: random seed if shuffle is `True`, default to `0`.
+        copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
+            default to `True`. if the random transforms don't modify the cache content
+            or every cache item is only used once in a `multi-processing` environment,
+            may set `copy=False` for better performance.
     """
 
     def __init__(
@@ -736,6 +747,7 @@ def __init__(
         progress: bool = True,
         shuffle: bool = True,
         seed: int = 0,
+        copy_cache: bool = True,
     ) -> None:
         if shuffle:
             self.set_random_state(seed=seed)
@@ -743,7 +755,7 @@ def __init__(
             self.randomize(data)
         self.shuffle = shuffle
 
-        super().__init__(data, transform, cache_num, cache_rate, num_init_workers, progress)
+        super().__init__(data, transform, cache_num, cache_rate, num_init_workers, progress, copy_cache)
         if self._cache is None:
             self._cache = self._fill_cache()
         if self.cache_num >= len(data):
@@ -977,7 +989,7 @@ def __init__(self, datasets: Sequence, transform: Optional[Callable] = None) ->
         super().__init__(list(datasets), transform=transform)
 
     def __len__(self) -> int:
-        return min((len(dataset) for dataset in self.data))
+        return min(len(dataset) for dataset in self.data)
 
     def _transform(self, index: int):
         def to_list(x):
diff --git a/monai/data/dataset_summary.py b/monai/data/dataset_summary.py
index a8598eb6c8..dfc22f9bc8 100644
--- a/monai/data/dataset_summary.py
+++ b/monai/data/dataset_summary.py
@@ -60,7 +60,7 @@ def __init__(
         self.image_key = image_key
         self.label_key = label_key
         if image_key:
-            self.meta_key = "{}_{}".format(image_key, meta_key_postfix)
+            self.meta_key = f"{image_key}_{meta_key_postfix}"
         self.all_meta_data: List = []
 
     def collect_meta_data(self):
diff --git a/monai/data/grid_dataset.py b/monai/data/grid_dataset.py
index 5b2a4d7abd..5c330f10e4 100644
--- a/monai/data/grid_dataset.py
+++ b/monai/data/grid_dataset.py
@@ -141,7 +141,7 @@ def __iter__(self):
         try:
             iter_end = len(self.dataset)  # TODO: support iterable self.dataset
         except TypeError:
-            raise NotImplementedError("image dataset must implement `len()`.")
+            raise NotImplementedError("image dataset must implement `len()`.") from None
 
         if worker_info is not None:
             # split workload
diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py
index cd1486d6d3..a2ddb74334 100644
--- a/monai/data/image_reader.py
+++ b/monai/data/image_reader.py
@@ -25,21 +25,17 @@
 from .utils import is_supported_format
 
 if TYPE_CHECKING:
-    import cucim
     import itk  # type: ignore
     import nibabel as nib
-    import openslide
     from nibabel.nifti1 import Nifti1Image
     from PIL import Image as PILImage
 
-    has_itk = has_nib = has_pil = has_cim = has_osl = True
+    has_itk = has_nib = has_pil = True
 else:
     itk, has_itk = optional_import("itk", allow_namespace_pkg=True)
     nib, has_nib = optional_import("nibabel")
     Nifti1Image, _ = optional_import("nibabel.nifti1", name="Nifti1Image")
     PILImage, has_pil = optional_import("PIL.Image")
-    cucim, has_cim = optional_import("cucim")
-    openslide, has_osl = optional_import("openslide")
 
 __all__ = ["ImageReader", "ITKReader", "NibabelReader", "NumpyReader", "PILReader", "WSIReader"]
 
@@ -670,11 +666,9 @@ def __init__(self, reader_lib: str = "OpenSlide"):
         super().__init__()
         self.reader_lib = reader_lib.lower()
         if self.reader_lib == "openslide":
-            if has_osl:
-                self.wsi_reader = openslide.OpenSlide
+            self.wsi_reader, *_ = optional_import("openslide", name="OpenSlide")
         elif self.reader_lib == "cucim":
-            if has_cim:
-                self.wsi_reader = cucim.CuImage
+            self.wsi_reader, *_ = optional_import("cucim", name="CuImage")
         else:
             raise ValueError('`reader_lib` should be either "cuCIM" or "OpenSlide"')
 
@@ -697,11 +691,6 @@ def read(self, data: Union[Sequence[str], str, np.ndarray], **kwargs):
             data: file name or a list of file names to read.
 
         """
-        if (self.reader_lib == "openslide") and (not has_osl):
-            raise ImportError("No module named 'openslide'")
-        if (self.reader_lib == "cucim") and (not has_cim):
-            raise ImportError("No module named 'cucim'")
-
         img_: List = []
 
         filenames: Sequence[str] = ensure_tuple(data)
diff --git a/monai/data/nifti_writer.py b/monai/data/nifti_writer.py
index c56d4c1e8d..210321daca 100644
--- a/monai/data/nifti_writer.py
+++ b/monai/data/nifti_writer.py
@@ -15,17 +15,19 @@
 import torch
 
 from monai.config import DtypeLike
+from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import compute_shape_offset, to_affine_nd
 from monai.networks.layers import AffineTransform
 from monai.utils import GridSampleMode, GridSamplePadMode, optional_import
+from monai.utils.type_conversion import convert_data_type
 
 nib, _ = optional_import("nibabel")
 
 
 def write_nifti(
-    data: np.ndarray,
+    data: NdarrayOrTensor,
     file_name: str,
-    affine: Optional[np.ndarray] = None,
+    affine: Optional[NdarrayOrTensor] = None,
     target_affine: Optional[np.ndarray] = None,
     resample: bool = True,
     output_spatial_shape: Union[Sequence[int], np.ndarray, None] = None,
@@ -96,13 +98,17 @@ def write_nifti(
             If None, use the data type of input data.
         output_dtype: data type for saving data. Defaults to ``np.float32``.
     """
+    if isinstance(data, torch.Tensor):
+        data, *_ = convert_data_type(data, np.ndarray)
+    if isinstance(affine, torch.Tensor):
+        affine, *_ = convert_data_type(affine, np.ndarray)
     if not isinstance(data, np.ndarray):
-        raise AssertionError("input data must be numpy array.")
+        raise AssertionError("input data must be numpy array or torch tensor.")
     dtype = dtype or data.dtype
     sr = min(data.ndim, 3)
     if affine is None:
         affine = np.eye(4, dtype=np.float64)
-    affine = to_affine_nd(sr, affine)
+    affine = to_affine_nd(sr, affine)  # type: ignore
 
     if target_affine is None:
         target_affine = affine
@@ -122,7 +128,7 @@ def write_nifti(
     data = nib.orientations.apply_orientation(data, ornt_transform)
     _affine = affine @ nib.orientations.inv_ornt_aff(ornt_transform, data_shape)
     if np.allclose(_affine, target_affine, atol=1e-3) or not resample:
-        results_img = nib.Nifti1Image(data.astype(output_dtype), to_affine_nd(3, _affine))
+        results_img = nib.Nifti1Image(data.astype(output_dtype), to_affine_nd(3, _affine))  # type: ignore
         nib.save(results_img, file_name)
         return
 
@@ -138,7 +144,7 @@ def write_nifti(
         while len(output_spatial_shape_) < 3:
             output_spatial_shape_ = output_spatial_shape_ + [1]
         spatial_shape, channel_shape = data.shape[:3], data.shape[3:]
-        data_np = data.reshape(list(spatial_shape) + [-1])
+        data_np: np.ndarray = data.reshape(list(spatial_shape) + [-1])  # type: ignore
         data_np = np.moveaxis(data_np, -1, 0)  # channel first for pytorch
         data_torch = affine_xform(
             torch.as_tensor(np.ascontiguousarray(data_np).astype(dtype)).unsqueeze(0),
diff --git a/monai/data/png_writer.py b/monai/data/png_writer.py
index 2baec3b872..52163e40ac 100644
--- a/monai/data/png_writer.py
+++ b/monai/data/png_writer.py
@@ -48,7 +48,7 @@ def write_png(
 
     """
     if not isinstance(data, np.ndarray):
-        raise AssertionError("input data must be numpy array.")
+        raise ValueError("input data must be numpy array.")
     if len(data.shape) == 3 and data.shape[2] == 1:  # PIL Image can't save image with 1 channel
         data = data.squeeze(2)
     if output_spatial_shape is not None:
@@ -59,11 +59,11 @@ def write_png(
         _min, _max = np.min(data), np.max(data)
         if len(data.shape) == 3:
             data = np.moveaxis(data, -1, 0)  # to channel first
-            data = xform(data)
+            data = xform(data)  # type: ignore
             data = np.moveaxis(data, 0, -1)
         else:  # (H, W)
             data = np.expand_dims(data, 0)  # make a channel
-            data = xform(data)[0]  # first channel
+            data = xform(data)[0]  # type: ignore
         if mode != InterpolateMode.NEAREST:
             data = np.clip(data, _min, _max)  # type: ignore
 
diff --git a/monai/data/utils.py b/monai/data/utils.py
index aab23217dc..6b577ebdf7 100644
--- a/monai/data/utils.py
+++ b/monai/data/utils.py
@@ -283,7 +283,7 @@ def list_data_collate(batch: Sequence):
                 + "`DataLoader` with `collate_fn=pad_list_data_collate` might solve this problem (check its "
                 + "documentation)."
             )
-        raise RuntimeError(re_str)
+        raise RuntimeError(re_str) from re
     except TypeError as re:
         re_str = str(re)
         if "numpy" in re_str and "Tensor" in re_str:
@@ -294,7 +294,7 @@ def list_data_collate(batch: Sequence):
                 + "creating your `DataLoader` with `collate_fn=pad_list_data_collate` might solve this problem "
                 + "(check its documentation)."
             )
-        raise TypeError(re_str)
+        raise TypeError(re_str) from re
 
 
 def decollate_batch(batch, detach: bool = True):
@@ -1029,7 +1029,7 @@ def json_hashing(item) -> bytes:
     """
     # TODO: Find way to hash transforms content as part of the cache
     cache_key = hashlib.md5(json.dumps(item, sort_keys=True).encode("utf-8")).hexdigest()
-    return f"{cache_key}".encode("utf-8")
+    return f"{cache_key}".encode()
 
 
 def pickle_hashing(item, protocol=pickle.HIGHEST_PROTOCOL) -> bytes:
@@ -1044,7 +1044,7 @@ def pickle_hashing(item, protocol=pickle.HIGHEST_PROTOCOL) -> bytes:
 
     """
     cache_key = hashlib.md5(pickle.dumps(sorted_dict(item), protocol=protocol)).hexdigest()
-    return f"{cache_key}".encode("utf-8")
+    return f"{cache_key}".encode()
 
 
 def sorted_dict(item, key=None, reverse=False):
diff --git a/monai/engines/evaluator.py b/monai/engines/evaluator.py
index 1c37da71d4..bfe9d01e1f 100644
--- a/monai/engines/evaluator.py
+++ b/monai/engines/evaluator.py
@@ -219,7 +219,7 @@ def __init__(
         self.network = network
         self.inferer = SimpleInferer() if inferer is None else inferer
 
-    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         """
         callback function for the Supervised Evaluation processing logic of 1 iteration in Ignite Engine.
         Return below items in a dictionary:
@@ -237,7 +237,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
         """
         if batchdata is None:
             raise ValueError("Must provide batch data for current iteration.")
-        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         if len(batch) == 2:
             inputs, targets = batch
             args: Tuple = ()
@@ -246,15 +246,15 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
             inputs, targets, args, kwargs = batch
 
         # put iteration outputs into engine.state
-        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}  # type: ignore
 
         # execute forward computation
         with self.mode(self.network):
             if self.amp:
                 with torch.cuda.amp.autocast():
-                    engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
+                    engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)  # type: ignore
             else:
-                engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
+                engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)  # type: ignore
         engine.fire_event(IterationEvents.FORWARD_COMPLETED)
         engine.fire_event(IterationEvents.MODEL_COMPLETED)
 
@@ -349,7 +349,7 @@ def __init__(
         self.pred_keys = ensure_tuple(pred_keys)
         self.inferer = SimpleInferer() if inferer is None else inferer
 
-    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+    def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         """
         callback function for the Supervised Evaluation processing logic of 1 iteration in Ignite Engine.
         Return below items in a dictionary:
@@ -370,7 +370,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
         """
         if batchdata is None:
             raise ValueError("Must provide batch data for current iteration.")
-        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         if len(batch) == 2:
             inputs, targets = batch
             args: Tuple = ()
@@ -379,17 +379,21 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]) -> Dict
             inputs, targets, args, kwargs = batch
 
         # put iteration outputs into engine.state
-        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}  # type: ignore
 
         for idx, network in enumerate(self.networks):
             with self.mode(network):
                 if self.amp:
                     with torch.cuda.amp.autocast():
+                        if isinstance(engine.state.output, dict):
+                            engine.state.output.update(
+                                {self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)}
+                            )
+                else:
+                    if isinstance(engine.state.output, dict):
                         engine.state.output.update(
                             {self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)}
                         )
-                else:
-                    engine.state.output.update({self.pred_keys[idx]: self.inferer(inputs, network, *args, **kwargs)})
         engine.fire_event(IterationEvents.FORWARD_COMPLETED)
         engine.fire_event(IterationEvents.MODEL_COMPLETED)
 
diff --git a/monai/engines/multi_gpu_supervised_trainer.py b/monai/engines/multi_gpu_supervised_trainer.py
index 3671dbcfd1..b6f516ff99 100644
--- a/monai/engines/multi_gpu_supervised_trainer.py
+++ b/monai/engines/multi_gpu_supervised_trainer.py
@@ -59,7 +59,7 @@ def create_multigpu_supervised_trainer(
     prepare_batch: Callable = _prepare_batch,
     output_transform: Callable = _default_transform,
     distributed: bool = False,
-) -> Engine:
+):
     """
     Derived from `create_supervised_trainer` in Ignite.
 
@@ -107,7 +107,7 @@ def create_multigpu_supervised_evaluator(
     prepare_batch: Callable = _prepare_batch,
     output_transform: Callable = _default_eval_transform,
     distributed: bool = False,
-) -> Engine:
+):
     """
     Derived from `create_supervised_evaluator` in Ignite.
 
diff --git a/monai/engines/trainer.py b/monai/engines/trainer.py
index 44e265be1f..eeda143def 100644
--- a/monai/engines/trainer.py
+++ b/monai/engines/trainer.py
@@ -172,7 +172,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         """
         if batchdata is None:
             raise ValueError("Must provide batch data for current iteration.")
-        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        batch = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         if len(batch) == 2:
             inputs, targets = batch
             args: Tuple = ()
@@ -180,7 +180,7 @@ def _iteration(self, engine: Engine, batchdata: Dict[str, torch.Tensor]):
         else:
             inputs, targets, args, kwargs = batch
         # put iteration outputs into engine.state
-        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}
+        engine.state.output = {Keys.IMAGE: inputs, Keys.LABEL: targets}  # type: ignore
 
         def _compute_pred_loss():
             engine.state.output[Keys.PRED] = self.inferer(inputs, self.network, *args, **kwargs)
@@ -198,13 +198,13 @@ def _compute_pred_loss():
         if self.amp and self.scaler is not None:
             with torch.cuda.amp.autocast():
                 _compute_pred_loss()
-            self.scaler.scale(engine.state.output[Keys.LOSS]).backward()
+            self.scaler.scale(engine.state.output[Keys.LOSS]).backward()  # type: ignore
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
             self.scaler.step(self.optimizer)
             self.scaler.update()
         else:
             _compute_pred_loss()
-            engine.state.output[Keys.LOSS].backward()
+            engine.state.output[Keys.LOSS].backward()  # type: ignore
             engine.fire_event(IterationEvents.BACKWARD_COMPLETED)
             self.optimizer.step()
         engine.fire_event(IterationEvents.MODEL_COMPLETED)
@@ -345,9 +345,14 @@ def _iteration(
         if batchdata is None:
             raise ValueError("must provide batch data for current iteration.")
 
-        d_input = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)
+        d_input = self.prepare_batch(batchdata, engine.state.device, engine.non_blocking)  # type: ignore
         batch_size = self.data_loader.batch_size  # type: ignore
-        g_input = self.g_prepare_batch(batch_size, self.latent_shape, engine.state.device, engine.non_blocking)
+        g_input = self.g_prepare_batch(
+            num_latents=batch_size,
+            latent_size=self.latent_shape,
+            device=engine.state.device,  # type: ignore
+            non_blocking=engine.non_blocking,  # type: ignore
+        )
         g_output = self.g_inferer(g_input, self.g_network)
 
         # Train Discriminator
@@ -367,7 +372,12 @@ def _iteration(
 
         # Train Generator
         if self.g_update_latents:
-            g_input = self.g_prepare_batch(batch_size, self.latent_shape, engine.state.device, engine.non_blocking)
+            g_input = self.g_prepare_batch(
+                num_latents=batch_size,
+                latent_size=self.latent_shape,
+                device=engine.state.device,  # type: ignore
+                non_blocking=engine.non_blocking,  # type: ignore
+            )
         g_output = self.g_inferer(g_input, self.g_network)
         if PT_BEFORE_1_7:
             self.g_optimizer.zero_grad()
diff --git a/monai/engines/workflow.py b/monai/engines/workflow.py
index ffb8ce05b3..3454095a02 100644
--- a/monai/engines/workflow.py
+++ b/monai/engines/workflow.py
@@ -152,15 +152,15 @@ def set_sampler_epoch(engine: Engine):
         self.scaler: Optional[torch.cuda.amp.GradScaler] = None
 
         if event_names is None:
-            event_names = [IterationEvents]
+            event_names = [IterationEvents]  # type: ignore
         else:
             if not isinstance(event_names, list):
                 raise ValueError("event_names must be a list or string or EventEnum.")
-            event_names += [IterationEvents]
+            event_names += [IterationEvents]  # type: ignore
         for name in event_names:
             if isinstance(name, str):
                 self.register_events(name, event_to_attr=event_to_attr)
-            elif issubclass(name, EventEnum):
+            elif issubclass(name, EventEnum):  # type: ignore
                 self.register_events(*name, event_to_attr=event_to_attr)
             else:
                 raise ValueError("event_names must be a list or string or EventEnum.")
@@ -187,8 +187,10 @@ def _register_decollate(self):
         def _decollate_data(engine: Engine) -> None:
             # replicate the scalar values to make sure all the items have batch dimension, then decollate
             transform = Decollated(keys=None, detach=True)
-            engine.state.batch = transform(engine.state.batch)
-            engine.state.output = transform(engine.state.output)
+            if isinstance(engine.state.batch, (list, dict)):
+                engine.state.batch = transform(engine.state.batch)
+            if isinstance(engine.state.output, (list, dict)):
+                engine.state.output = transform(engine.state.output)
 
     def _register_postprocessing(self, posttrans: Callable):
         """
@@ -226,12 +228,13 @@ def _register_metrics(self, k_metric: Dict, add_metrics: Optional[Dict] = None):
 
         @self.on(Events.EPOCH_COMPLETED)
         def _compare_metrics(engine: Engine) -> None:
-            if engine.state.key_metric_name is not None:
-                current_val_metric = engine.state.metrics[engine.state.key_metric_name]
-                if self.metric_cmp_fn(current_val_metric, engine.state.best_metric):
-                    self.logger.info(f"Got new best metric of {engine.state.key_metric_name}: {current_val_metric}")
-                    engine.state.best_metric = current_val_metric
-                    engine.state.best_metric_epoch = engine.state.epoch
+            key_metric_name = engine.state.key_metric_name  # type: ignore
+            if key_metric_name is not None:
+                current_val_metric = engine.state.metrics[key_metric_name]
+                if self.metric_cmp_fn(current_val_metric, engine.state.best_metric):  # type: ignore
+                    self.logger.info(f"Got new best metric of {key_metric_name}: {current_val_metric}")
+                    engine.state.best_metric = current_val_metric  # type: ignore
+                    engine.state.best_metric_epoch = engine.state.epoch  # type: ignore
 
     def _register_handlers(self, handlers: Sequence):
         """
diff --git a/monai/handlers/__init__.py b/monai/handlers/__init__.py
index c9eecc6d46..bf1a9d3f89 100644
--- a/monai/handlers/__init__.py
+++ b/monai/handlers/__init__.py
@@ -32,13 +32,5 @@
 from .stats_handler import StatsHandler
 from .surface_distance import SurfaceDistance
 from .tensorboard_handlers import TensorBoardHandler, TensorBoardImageHandler, TensorBoardStatsHandler
-from .transform_inverter import TransformInverter
-from .utils import (
-    evenly_divisible_all_gather,
-    from_engine,
-    stopping_fn_from_loss,
-    stopping_fn_from_metric,
-    string_list_all_gather,
-    write_metrics_reports,
-)
+from .utils import from_engine, stopping_fn_from_loss, stopping_fn_from_metric, write_metrics_reports
 from .validation_handler import ValidationHandler
diff --git a/monai/handlers/checkpoint_loader.py b/monai/handlers/checkpoint_loader.py
index f1f60abf63..7c30584b13 100644
--- a/monai/handlers/checkpoint_loader.py
+++ b/monai/handlers/checkpoint_loader.py
@@ -126,7 +126,7 @@ def __call__(self, engine: Engine) -> None:
         # save current max epochs setting in the engine, don't overwrite it if larger than max_epochs in checkpoint
         prior_max_epochs = engine.state.max_epochs
         Checkpoint.load_objects(to_load=self.load_dict, checkpoint=checkpoint, strict=self.strict)
-        if engine.state.epoch > prior_max_epochs:
+        if prior_max_epochs is not None and engine.state.epoch > prior_max_epochs:
             raise ValueError(
                 f"Epoch count ({engine.state.epoch}) in checkpoint is larger than "
                 f"the `engine.state.max_epochs` ({prior_max_epochs}) of engine. To further train from checkpoint, "
diff --git a/monai/handlers/checkpoint_saver.py b/monai/handlers/checkpoint_saver.py
index f365ff73c4..d5aadadfed 100644
--- a/monai/handlers/checkpoint_saver.py
+++ b/monai/handlers/checkpoint_saver.py
@@ -11,7 +11,7 @@
 
 import logging
 import warnings
-from typing import TYPE_CHECKING, Dict, Optional
+from typing import TYPE_CHECKING, Dict, Mapping, Optional
 
 from monai.config import IgniteInfo
 from monai.utils import min_version, optional_import
@@ -126,7 +126,7 @@ def __init__(self, dirname: str, filename: Optional[str] = None):
                 super().__init__(dirname=dirname, require_empty=False, atomic=False)
                 self.filename = filename
 
-            def __call__(self, checkpoint: Dict, filename: str, metadata: Optional[Dict] = None) -> None:
+            def __call__(self, checkpoint: Mapping, filename: str, metadata: Optional[Mapping] = None) -> None:
                 if self.filename is not None:
                     filename = self.filename
                 super().__call__(checkpoint=checkpoint, filename=filename, metadata=metadata)
@@ -154,8 +154,8 @@ def _final_func(engine: Engine):
             def _score_func(engine: Engine):
                 if isinstance(key_metric_name, str):
                     metric_name = key_metric_name
-                elif hasattr(engine.state, "key_metric_name") and isinstance(engine.state.key_metric_name, str):
-                    metric_name = engine.state.key_metric_name
+                elif hasattr(engine.state, "key_metric_name"):
+                    metric_name = engine.state.key_metric_name  # type: ignore
                 else:
                     raise ValueError(
                         f"Incompatible values: save_key_metric=True and key_metric_name={key_metric_name}."
diff --git a/monai/handlers/decollate_batch.py b/monai/handlers/decollate_batch.py
index 4e99fc6f04..0905ee6ebc 100644
--- a/monai/handlers/decollate_batch.py
+++ b/monai/handlers/decollate_batch.py
@@ -88,7 +88,7 @@ def __call__(self, engine: Engine) -> None:
         Args:
             engine: Ignite Engine, it can be a trainer, validator or evaluator.
         """
-        if self.batch_transform is not None:
+        if self.batch_transform is not None and isinstance(engine.state.batch, (list, dict)):
             engine.state.batch = self.batch_transform(engine.state.batch)
-        if self.output_transform is not None:
+        if self.output_transform is not None and isinstance(engine.state.output, (list, dict)):
             engine.state.output = self.output_transform(engine.state.output)
diff --git a/monai/handlers/garbage_collector.py b/monai/handlers/garbage_collector.py
index fffca2a740..1eb970e795 100644
--- a/monai/handlers/garbage_collector.py
+++ b/monai/handlers/garbage_collector.py
@@ -42,6 +42,7 @@ class GarbageCollector:
     """
 
     def __init__(self, trigger_event: str = "epoch", log_level: int = 10):
+        self.trigger_event: Events
         if isinstance(trigger_event, Events):
             self.trigger_event = trigger_event
         elif trigger_event.lower() == "epoch":
diff --git a/monai/handlers/ignite_metric.py b/monai/handlers/ignite_metric.py
index cbf84e4626..ea7bcd8eee 100644
--- a/monai/handlers/ignite_metric.py
+++ b/monai/handlers/ignite_metric.py
@@ -101,7 +101,7 @@ def compute(self) -> Any:
         if self.save_details:
             if self._engine is None or self._name is None:
                 raise RuntimeError("please call the attach() function to connect expected engine first.")
-            self._engine.state.metric_details[self._name] = self.metric_fn.get_buffer()
+            self._engine.state.metric_details[self._name] = self.metric_fn.get_buffer()  # type: ignore
 
         return result.item() if isinstance(result, torch.Tensor) else result
 
@@ -120,4 +120,4 @@ def attach(self, engine: Engine, name: str) -> None:
         self._engine = engine
         self._name = name
         if self.save_details and not hasattr(engine.state, "metric_details"):
-            engine.state.metric_details = {}
+            engine.state.metric_details = {}  # type: ignore
diff --git a/monai/handlers/metrics_saver.py b/monai/handlers/metrics_saver.py
index 97b080b244..4c722eb35b 100644
--- a/monai/handlers/metrics_saver.py
+++ b/monai/handlers/metrics_saver.py
@@ -132,10 +132,12 @@ def __call__(self, engine: Engine) -> None:
             if self.metrics is not None and len(engine.state.metrics) > 0:
                 _metrics = {k: v for k, v in engine.state.metrics.items() if k in self.metrics or "*" in self.metrics}
             _metric_details = {}
-            if self.metric_details is not None and len(engine.state.metric_details) > 0:
-                for k, v in engine.state.metric_details.items():
-                    if k in self.metric_details or "*" in self.metric_details:
-                        _metric_details[k] = v
+            if hasattr(engine.state, "metric_details"):
+                details = engine.state.metric_details  # type: ignore
+                if self.metric_details is not None and len(details) > 0:
+                    for k, v in details.items():
+                        if k in self.metric_details or "*" in self.metric_details:
+                            _metric_details[k] = v
 
             write_metrics_reports(
                 save_dir=self.save_dir,
diff --git a/monai/handlers/nvtx_handlers.py b/monai/handlers/nvtx_handlers.py
index aba7a7ec0e..847a3c0c47 100644
--- a/monai/handlers/nvtx_handlers.py
+++ b/monai/handlers/nvtx_handlers.py
@@ -97,9 +97,7 @@ def create_paired_events(self, event: str) -> Tuple[Events, Events]:
         )
 
     def get_event(self, event: Union[str, Events]) -> Events:
-        if isinstance(event, str):
-            event = event.upper()
-        return Events[event]
+        return Events[event.upper()] if isinstance(event, str) else event
 
     def attach(self, engine: Engine) -> None:
         """
@@ -126,10 +124,8 @@ class RangePushHandler:
         msg: ASCII message to associate with range
     """
 
-    def __init__(self, event: Events, msg: Optional[str] = None) -> None:
-        if isinstance(event, str):
-            event = event.upper()
-        self.event = Events[event]
+    def __init__(self, event: Union[str, Events], msg: Optional[str] = None) -> None:
+        self.event = Events[event.upper()] if isinstance(event, str) else event
         if msg is None:
             msg = self.event.name
         self.msg = msg
@@ -156,10 +152,8 @@ class RangePopHandler:
         msg: ASCII message to associate with range
     """
 
-    def __init__(self, event: Events) -> None:
-        if isinstance(event, str):
-            event = event.upper()
-        self.event = Events[event]
+    def __init__(self, event: Union[str, Events]) -> None:
+        self.event = Events[event.upper()] if isinstance(event, str) else event
 
     def attach(self, engine: Engine) -> None:
         """
@@ -181,10 +175,8 @@ class MarkHandler:
         msg: ASCII message to associate with range
     """
 
-    def __init__(self, event: Events, msg: Optional[str] = None) -> None:
-        if isinstance(event, str):
-            event = event.upper()
-        self.event = Events[event]
+    def __init__(self, event: Union[str, Events], msg: Optional[str] = None) -> None:
+        self.event = Events[event.upper()] if isinstance(event, str) else event
         if msg is None:
             msg = self.event.name
         self.msg = msg
diff --git a/monai/handlers/segmentation_saver.py b/monai/handlers/segmentation_saver.py
index 535f58945b..27b2cbc039 100644
--- a/monai/handlers/segmentation_saver.py
+++ b/monai/handlers/segmentation_saver.py
@@ -26,7 +26,7 @@
     Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
 
 
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="Please consider using `SaveImage[d]` transform instead.")
+@deprecated(since="0.6.0", removed="0.8.0", msg_suffix="Please consider using `SaveImage[d]` transform instead.")
 class SegmentationSaver:
     """
     Event handler triggered on completing every iteration to save the segmentation predictions into files.
diff --git a/monai/handlers/stats_handler.py b/monai/handlers/stats_handler.py
index d5756074fc..c15abac542 100644
--- a/monai/handlers/stats_handler.py
+++ b/monai/handlers/stats_handler.py
@@ -172,8 +172,9 @@ def _default_epoch_print(self, engine: Engine) -> None:
             and hasattr(engine.state, "best_metric")
             and hasattr(engine.state, "best_metric_epoch")
         ):
-            out_str = f"Key metric: {engine.state.key_metric_name} "
-            out_str += f"best value: {engine.state.best_metric} at epoch: {engine.state.best_metric_epoch}"
+            out_str = f"Key metric: {engine.state.key_metric_name} "  # type: ignore
+            out_str += f"best value: {engine.state.best_metric} "  # type: ignore
+            out_str += f"at epoch: {engine.state.best_metric_epoch}"  # type: ignore
         self.logger.info(out_str)
 
     def _default_iteration_print(self, engine: Engine) -> None:
@@ -220,7 +221,9 @@ def _default_iteration_print(self, engine: Engine) -> None:
             return  # no value to print
 
         num_iterations = engine.state.epoch_length
-        current_iteration = (engine.state.iteration - 1) % num_iterations + 1
+        current_iteration = engine.state.iteration - 1
+        if num_iterations is not None:
+            current_iteration %= num_iterations + 1
         current_epoch = engine.state.epoch
         num_epochs = engine.state.max_epochs
 
diff --git a/monai/handlers/transform_inverter.py b/monai/handlers/transform_inverter.py
deleted file mode 100644
index 83b5f56396..0000000000
--- a/monai/handlers/transform_inverter.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Copyright 2020 - 2021 MONAI Consortium
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#     http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import warnings
-from typing import TYPE_CHECKING, Callable, Optional, Sequence, Union
-
-import torch
-
-from monai.config import IgniteInfo, KeysCollection
-from monai.engines.utils import CommonKeys, IterationEvents
-from monai.transforms import Invertd, InvertibleTransform
-from monai.utils import deprecated, ensure_tuple, ensure_tuple_rep, min_version, optional_import
-
-Events, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Events")
-if TYPE_CHECKING:
-    from ignite.engine import Engine
-else:
-    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
-
-
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="Please consider using `Invertd` transform instead.")
-class TransformInverter:
-    """
-    Ignite handler to automatically invert `transforms`.
-    It takes `engine.state.output` as the input data and uses the transforms information from `engine.state.batch`.
-    Expect both `engine.state.output` and `engine.state.batch` to be list of dictionaries data.
-    The inverted data is in-place saved back to `engine.state.output` with key: "{output_key}".
-    And the inverted meta dict will be stored in `engine.state.batch`
-    with key: "{meta_keys}" or "{key}_{meta_key_postfix}".
-
-    .. deprecated:: 0.6.0
-        Use :class:`monai.transforms.Invertd` instead.
-
-    """
-
-    def __init__(
-        self,
-        transform: InvertibleTransform,
-        output_keys: KeysCollection = CommonKeys.PRED,
-        batch_keys: KeysCollection = CommonKeys.IMAGE,
-        meta_keys: Optional[KeysCollection] = None,
-        batch_meta_keys: Optional[KeysCollection] = None,
-        meta_key_postfix: str = "meta_dict",
-        nearest_interp: Union[bool, Sequence[bool]] = True,
-        to_tensor: Union[bool, Sequence[bool]] = True,
-        device: Union[Union[str, torch.device], Sequence[Union[str, torch.device]]] = "cpu",
-        post_func: Union[Callable, Sequence[Callable]] = lambda x: x,
-        num_workers: Optional[int] = 0,
-    ) -> None:
-        """
-        Args:
-            transform: a callable data transform on input data.
-            output_keys: the key of expected data in `ignite.engine.output`, invert transforms on it.
-                it also can be a list of keys, will invert transform for each of them.
-                Default to "pred". it's in-place operation.
-            batch_keys: the key of input data in `ignite.engine.batch`. will get the applied transforms
-                for this input data, then invert them for the expected data with `output_keys`.
-                It can also be a list of keys, each matches to the `output_keys` data. default to "image".
-            meta_keys: explicitly indicate the key for the inverted meta data dictionary.
-                the meta data is a dictionary object which contains: filename, original_shape, etc.
-                it can be a sequence of string, map to the `keys`.
-                if None, will try to construct meta_keys by `{key}_{meta_key_postfix}`.
-            batch_meta_keys: the key of the meta data of input data in `ignite.engine.batch`,
-                will get the `affine`, `data_shape`, etc.
-                the meta data is a dictionary object which contains: filename, original_shape, etc.
-                it can be a sequence of string, map to the `keys`.
-                if None, will try to construct meta_keys by `{orig_key}_{meta_key_postfix}`.
-                meta data will also be inverted and stored in `meta_keys`.
-            meta_key_postfix: if `orig_meta_keys` is None, use `{orig_key}_{meta_key_postfix}` to to fetch the
-                meta data from dict, if `meta_keys` is None, use `{key}_{meta_key_postfix}`.
-                default is `meta_dict`, the meta data is a dictionary object.
-                For example, to handle orig_key `image`,  read/write `affine` matrices from the
-                metadata `image_meta_dict` dictionary's `affine` field.
-                the inverted meta dict will be stored with key: "{key}_{meta_key_postfix}".
-            nearest_interp: whether to use `nearest` interpolation mode when inverting the spatial transforms,
-                default to `True`. If `False`, use the same interpolation mode as the original transform.
-                it also can be a list of bool, each matches to the `output_keys` data.
-            to_tensor: whether to convert the inverted data into PyTorch Tensor first, default to `True`.
-                it also can be a list of bool, each matches to the `output_keys` data.
-            device: if converted to Tensor, move the inverted results to target device before `post_func`,
-                default to "cpu", it also can be a list of string or `torch.device`,
-                each matches to the `output_keys` data.
-            post_func: post processing for the inverted data, should be a callable function.
-                it also can be a list of callable, each matches to the `output_keys` data.
-
-        """
-        self.inverter = Invertd(
-            keys=output_keys,
-            transform=transform,
-            orig_keys=batch_keys,
-            meta_keys=meta_keys,
-            orig_meta_keys=batch_meta_keys,
-            meta_key_postfix=meta_key_postfix,
-            nearest_interp=nearest_interp,
-            to_tensor=to_tensor,
-            device=device,
-            post_func=post_func,
-        )
-        self.output_keys = ensure_tuple(output_keys)
-        self.meta_keys = ensure_tuple_rep(None, len(self.output_keys)) if meta_keys is None else ensure_tuple(meta_keys)
-        if len(self.output_keys) != len(self.meta_keys):
-            raise ValueError("meta_keys should have the same length as output_keys.")
-        self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.output_keys))
-
-    def attach(self, engine: Engine) -> None:
-        """
-        Args:
-            engine: Ignite Engine, it can be a trainer, validator or evaluator.
-        """
-        engine.add_event_handler(IterationEvents.MODEL_COMPLETED, self)
-
-    def __call__(self, engine: Engine) -> None:
-        """
-        Args:
-            engine: Ignite Engine, it can be a trainer, validator or evaluator.
-        """
-        if not isinstance(engine.state.batch, list) or not isinstance(engine.state.output, list):
-            warnings.warn("inverter requires `engine.state.batch` and `engine.state.output` to be lists.")
-        else:
-            for i, (b, o) in enumerate(zip(engine.state.batch, engine.state.output)):
-                # combine `batch` and `output` to temporarily act as 1 dict for postprocessing
-                data = dict(b)
-                data.update(o)
-                ret = self.inverter(data)
-
-                for output_key, meta_key, meta_key_postfix in zip(
-                    self.output_keys, self.meta_keys, self.meta_key_postfix
-                ):
-                    # save the inverted data into state.output
-                    engine.state.output[i][output_key] = ret.get(output_key)
-                    # save the inverted meta dict into state.batch
-                    meta_key = meta_key or f"{output_key}_{meta_key_postfix}"
-                    if meta_key in ret:
-                        # FIXME: we save inverted meta dict into `batch` to be compatible with `SegmentationSaver`
-                        # will deprecate both handlers soon
-                        engine.state.batch[i][meta_key] = ret.get(meta_key)
diff --git a/monai/handlers/utils.py b/monai/handlers/utils.py
index 13f23c582a..5d72c028f9 100644
--- a/monai/handlers/utils.py
+++ b/monai/handlers/utils.py
@@ -11,13 +11,13 @@
 
 import os
 from collections import OrderedDict
-from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Union
+from typing import TYPE_CHECKING, Dict, Optional, Sequence, Union
 
 import numpy as np
 import torch
 
 from monai.config import IgniteInfo, KeysCollection
-from monai.utils import deprecated, ensure_tuple, get_torch_version_tuple, look_up_option, min_version, optional_import
+from monai.utils import ensure_tuple, look_up_option, min_version, optional_import
 
 idist, _ = optional_import("ignite", IgniteInfo.OPT_IMPORT_VERSION, min_version, "distributed")
 if TYPE_CHECKING:
@@ -28,8 +28,6 @@
 __all__ = [
     "stopping_fn_from_metric",
     "stopping_fn_from_loss",
-    "evenly_divisible_all_gather",
-    "string_list_all_gather",
     "write_metrics_reports",
     "from_engine",
 ]
@@ -52,83 +50,11 @@ def stopping_fn_from_loss():
     """
 
     def stopping_fn(engine: Engine):
-        return -engine.state.output
+        return -engine.state.output  # type:ignore
 
     return stopping_fn
 
 
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="The API had been moved to monai.utils module.")
-def evenly_divisible_all_gather(data: torch.Tensor) -> torch.Tensor:
-    """
-    Utility function for distributed data parallel to pad at first dim to make it evenly divisible and all_gather.
-
-    Args:
-        data: source tensor to pad and execute all_gather in distributed data parallel.
-
-    Note:
-        The input data on different ranks must have exactly same `dtype`.
-
-    .. versionchanged:: 0.6.0
-        The API had been moved to `monai.utils`.
-
-    """
-    if not isinstance(data, torch.Tensor):
-        raise ValueError("input data must be PyTorch Tensor.")
-
-    if idist.get_world_size() <= 1:
-        return data
-
-    # make sure the data is evenly-divisible on multi-GPUs
-    length = data.shape[0]
-    all_lens = idist.all_gather(length)
-    max_len = max(all_lens)
-    if length < max_len:
-        size = [max_len - length] + list(data.shape[1:])
-        data = torch.cat([data, data.new_full(size, 0)], dim=0)
-    # all gather across all processes
-    data = idist.all_gather(data)
-    # delete the padding NaN items
-    return torch.cat([data[i * max_len : i * max_len + l, ...] for i, l in enumerate(all_lens)], dim=0)
-
-
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="The API had been moved to monai.utils module.")
-def string_list_all_gather(strings: List[str]) -> List[str]:
-    """
-    Utility function for distributed data parallel to all gather a list of strings.
-    Note that if the item in `strings` is longer than 1024 chars, it will be truncated to 1024:
-    https://pytorch.org/ignite/v0.4.5/distributed.html#ignite.distributed.utils.all_gather.
-
-    Args:
-        strings: a list of strings to all gather.
-
-    .. versionchanged:: 0.6.0
-        The API had been moved to `monai.utils`.
-
-    """
-    world_size = idist.get_world_size()
-    if world_size <= 1:
-        return strings
-
-    result: List[List[str]] = [[] for _ in range(world_size)]
-    # get length of strings
-    length = len(strings)
-    all_lens = idist.all_gather(length)
-    max_len = max(all_lens)
-    # pad the item to make sure the same length
-    if length < max_len:
-        strings += ["" for _ in range(max_len - length)]
-
-    if get_torch_version_tuple() <= (1, 6):
-        raise RuntimeError("string all_gather can not be supported in PyTorch < 1.7.0.")
-
-    for s in strings:
-        gathered = idist.all_gather(s)
-        for i, g in enumerate(gathered):
-            if len(g) > 0:
-                result[i].append(g)
-    return [i for k in result for i in k]
-
-
 def write_metrics_reports(
     save_dir: str,
     images: Optional[Sequence[str]],
@@ -204,12 +130,12 @@ class    mean    median    max    5percentile 95percentile  notnans
             if summary_ops is not None:
                 supported_ops = OrderedDict(
                     {
-                        "mean": lambda x: np.nanmean(x),
-                        "median": lambda x: np.nanmedian(x),
-                        "max": lambda x: np.nanmax(x),
-                        "min": lambda x: np.nanmin(x),
+                        "mean": np.nanmean,
+                        "median": np.nanmedian,
+                        "max": np.nanmax,
+                        "min": np.nanmin,
                         "90percentile": lambda x: np.nanpercentile(x[0], x[1]),
-                        "std": lambda x: np.nanstd(x),
+                        "std": np.nanstd,
                         "notnans": lambda x: (~np.isnan(x)).sum(),
                     }
                 )
@@ -223,7 +149,7 @@ def _compute_op(op: str, d: np.ndarray):
                         return c_op(d)
 
                     threshold = int(op.split("percentile")[0])
-                    return supported_ops["90percentile"]((d, threshold))
+                    return supported_ops["90percentile"]((d, threshold))  # type: ignore
 
                 with open(os.path.join(save_dir, f"{k}_summary.csv"), "w") as f:
                     f.write(f"class{deli}{deli.join(ops)}\n")
diff --git a/monai/losses/deform.py b/monai/losses/deform.py
index d96fa1440a..b9eb15f798 100644
--- a/monai/losses/deform.py
+++ b/monai/losses/deform.py
@@ -65,7 +65,7 @@ def __init__(
                 - ``"mean"``: the sum of the output will be divided by the number of elements in the output.
                 - ``"sum"``: the output will be summed.
         """
-        super(BendingEnergyLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
 
     def forward(self, pred: torch.Tensor) -> torch.Tensor:
         """
diff --git a/monai/losses/dice.py b/monai/losses/dice.py
index 325c5300ea..6bf9680bca 100644
--- a/monai/losses/dice.py
+++ b/monai/losses/dice.py
@@ -419,7 +419,7 @@ def __init__(
                 wass_loss(pred_score, grnd)  # 0
 
         """
-        super(GeneralizedWassersteinDiceLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
 
         if dist_matrix.shape[0] != dist_matrix.shape[1]:
             raise ValueError(f"dist_matrix must be C x C, got {dist_matrix.shape[0]} x {dist_matrix.shape[1]}.")
diff --git a/monai/losses/focal_loss.py b/monai/losses/focal_loss.py
index b4b3698e5b..157ce9fd01 100644
--- a/monai/losses/focal_loss.py
+++ b/monai/losses/focal_loss.py
@@ -67,7 +67,7 @@ def __init__(
                 fl(pred, grnd)
 
         """
-        super(FocalLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
         self.include_background = include_background
         self.to_onehot_y = to_onehot_y
         self.gamma = gamma
diff --git a/monai/losses/image_dissimilarity.py b/monai/losses/image_dissimilarity.py
index eed5808aa3..78f92303fc 100644
--- a/monai/losses/image_dissimilarity.py
+++ b/monai/losses/image_dissimilarity.py
@@ -8,14 +8,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 
 import torch
 from torch.nn import functional as F
 from torch.nn.modules.loss import _Loss
 
 from monai.networks.layers import gaussian_1d, separable_filtering
-from monai.utils import LossReduction
+from monai.utils import LossReduction, deprecated_arg
+from monai.utils.module import look_up_option
 
 
 def make_rectangular_kernel(kernel_size: int) -> torch.Tensor:
@@ -59,18 +60,20 @@ class LocalNormalizedCrossCorrelationLoss(_Loss):
         DeepReg (https://github.com/DeepRegNet/DeepReg)
     """
 
+    @deprecated_arg(name="ndim", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        ndim: int = 3,
+        spatial_dims: int = 3,
         kernel_size: int = 3,
         kernel_type: str = "rectangular",
         reduction: Union[LossReduction, str] = LossReduction.MEAN,
         smooth_nr: float = 1e-5,
         smooth_dr: float = 1e-5,
+        ndim: Optional[int] = None,
     ) -> None:
         """
         Args:
-            ndim: number of spatial ndimensions, {``1``, ``2``, ``3``}. Defaults to 3.
+            spatial_dims: number of spatial ndimensions, {``1``, ``2``, ``3``}. Defaults to 3.
             kernel_size: kernel spatial size, must be odd.
             kernel_type: {``"rectangular"``, ``"triangular"``, ``"gaussian"``}. Defaults to ``"rectangular"``.
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -81,22 +84,24 @@ def __init__(
                 - ``"sum"``: the output will be summed.
             smooth_nr: a small constant added to the numerator to avoid nan.
             smooth_dr: a small constant added to the denominator to avoid nan.
+
+        .. deprecated:: 0.6.0
+            ``ndim`` is deprecated, use ``spatial_dims``.
         """
-        super(LocalNormalizedCrossCorrelationLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
 
-        self.ndim = ndim
-        if self.ndim not in [1, 2, 3]:
+        if ndim is not None:
+            spatial_dims = ndim
+        self.ndim = spatial_dims
+        if self.ndim not in {1, 2, 3}:
             raise ValueError(f"Unsupported ndim: {self.ndim}-d, only 1-d, 2-d, and 3-d inputs are supported")
 
         self.kernel_size = kernel_size
         if self.kernel_size % 2 == 0:
             raise ValueError(f"kernel_size must be odd, got {self.kernel_size}")
 
-        if kernel_type not in kernel_dict.keys():
-            raise ValueError(
-                f'Unsupported kernel_type: {kernel_type}, available options are ["rectangular", "triangular", "gaussian"].'
-            )
-        self.kernel = kernel_dict[kernel_type](self.kernel_size)
+        _kernel = look_up_option(kernel_type, kernel_dict)
+        self.kernel = _kernel(self.kernel_size)
         self.kernel_vol = self.get_kernel_vol()
 
         self.smooth_nr = float(smooth_nr)
@@ -170,6 +175,7 @@ class GlobalMutualInformationLoss(_Loss):
 
     def __init__(
         self,
+        kernel_type: str = "gaussian",
         num_bins: int = 23,
         sigma_ratio: float = 0.5,
         reduction: Union[LossReduction, str] = LossReduction.MEAN,
@@ -178,6 +184,19 @@ def __init__(
     ) -> None:
         """
         Args:
+            kernel_type: {``"gaussian"``, ``"b-spline"``}
+                ``"gaussian"``: adapted from DeepReg
+                Reference: https://dspace.mit.edu/handle/1721.1/123142, Section 3.1, equation 3.1-3.5, Algorithm 1.
+                ``"b-spline"``: based on the method of Mattes et al [1,2] and adapted from ITK
+                References:
+                  [1] "Nonrigid multimodality image registration"
+                      D. Mattes, D. R. Haynor, H. Vesselle, T. Lewellen and W. Eubank
+                      Medical Imaging 2001: Image Processing, 2001, pp. 1609-1620.
+                  [2] "PET-CT Image Registration in the Chest Using Free-form Deformations"
+                      D. Mattes, D. R. Haynor, H. Vesselle, T. Lewellen and W. Eubank
+                      IEEE Transactions in Medical Imaging. Vol.22, No.1,
+                      January 2003. pp.120-128.
+
             num_bins: number of bins for intensity
             sigma_ratio: a hyper param for gaussian function
             reduction: {``"none"``, ``"mean"``, ``"sum"``}
@@ -189,25 +208,99 @@ def __init__(
             smooth_nr: a small constant added to the numerator to avoid nan.
             smooth_dr: a small constant added to the denominator to avoid nan.
         """
-        super(GlobalMutualInformationLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
         if num_bins <= 0:
             raise ValueError("num_bins must > 0, got {num_bins}")
         bin_centers = torch.linspace(0.0, 1.0, num_bins)  # (num_bins,)
         sigma = torch.mean(bin_centers[1:] - bin_centers[:-1]) * sigma_ratio
-        self.preterm = 1 / (2 * sigma ** 2)
-        self.bin_centers = bin_centers[None, None, ...]
+        self.kernel_type = look_up_option(kernel_type, ["gaussian", "b-spline"])
+        self.num_bins = num_bins
+        self.kernel_type = kernel_type
+        if self.kernel_type == "gaussian":
+            self.preterm = 1 / (2 * sigma ** 2)
+            self.bin_centers = bin_centers[None, None, ...]
         self.smooth_nr = float(smooth_nr)
         self.smooth_dr = float(smooth_dr)
 
-    def parzen_windowing(self, pred: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+    def parzen_windowing(
+        self, pred: torch.Tensor, target: torch.Tensor
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        if self.kernel_type == "gaussian":
+            pred_weight, pred_probability = self.parzen_windowing_gaussian(pred)
+            target_weight, target_probability = self.parzen_windowing_gaussian(target)
+        elif self.kernel_type == "b-spline":
+            # a third order BSpline kernel is used for the pred image intensity PDF.
+            pred_weight, pred_probability = self.parzen_windowing_b_spline(pred, order=3)
+            # a zero order (box car) BSpline kernel is used for the target image intensity PDF.
+            target_weight, target_probability = self.parzen_windowing_b_spline(target, order=0)
+        else:
+            raise ValueError
+        return pred_weight, pred_probability, target_weight, target_probability
+
+    def parzen_windowing_b_spline(self, img: torch.Tensor, order: int) -> Tuple[torch.Tensor, torch.Tensor]:
         """
+        Parzen windowing with b-spline kernel (adapted from ITK)
+
         Args:
-            pred: the shape should be B[NDHW].
+            img: the shape should be B[NDHW].
+            order: int.
+        """
+
+        # Compute binsize for the histograms.
+        #
+        # The binsize for the image intensities needs to be adjusted so that
+        # we can avoid dealing with boundary conditions using the cubic
+        # spline as the Parzen window.  We do this by increasing the size
+        # of the bins so that the joint histogram becomes "padded" at the
+        # borders. Because we are changing the binsize,
+        # we also need to shift the minimum by the padded amount in order to
+        # avoid minimum values filling in our padded region.
+        #
+        # Note that there can still be non-zero bin values in the padded region,
+        # it's just that these bins will never be a central bin for the Parzen
+        # window.
+        _max, _min = torch.max(img), torch.min(img)
+        padding = 2
+        bin_size = (_max - _min) / (self.num_bins - 2 * padding)
+        norm_min = torch.div(_min, bin_size, rounding_mode="floor") - padding
+
+        # assign bin/window index to each voxel
+        window_term = torch.div(img, bin_size) - norm_min  # B[NDHW]
+        # make sure the extreme values are in valid (non-padded) bins
+        window_term = torch.clamp(window_term, padding, self.num_bins - padding - 1)  # B[NDHW]
+        window_term = window_term.reshape(window_term.shape[0], -1, 1)  # (batch, num_sample, 1)
+        bins = torch.arange(self.num_bins, device=window_term.device).reshape(1, 1, -1)  # (1, 1, num_bins)
+        sample_bin_matrix = torch.abs(bins - window_term)  # (batch, num_sample, num_bins)
+
+        # b-spleen kernel
+        # (4 - 6 * abs ** 2 + 3 * abs ** 3) / 6 when 0 <= abs < 1
+        # (2 - abs) ** 3 / 6 when 1 <= abs < 2
+        weight = torch.zeros_like(sample_bin_matrix, dtype=torch.float)  # (batch, num_sample, num_bins)
+        if order == 0:
+            weight = weight + (sample_bin_matrix < 0.5) + (sample_bin_matrix == 0.5) * 0.5
+        elif order == 3:
+            weight = (
+                weight + (4 - 6 * sample_bin_matrix ** 2 + 3 * sample_bin_matrix ** 3) * (sample_bin_matrix < 1) / 6
+            )
+            weight = weight + (2 - sample_bin_matrix) ** 3 * (sample_bin_matrix >= 1) * (sample_bin_matrix < 2) / 6
+        else:
+            raise ValueError(f"Do not support b-spline {order}-order parzen windowing")
+
+        weight = weight / torch.sum(weight, dim=-1, keepdim=True)  # (batch, num_sample, num_bins)
+        probability = torch.mean(weight, dim=-2, keepdim=True)  # (batch, 1, num_bins)
+        return weight, probability
+
+    def parzen_windowing_gaussian(self, img: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Parzen windowing with gaussian kernel (adapted from DeepReg implementation)
+        Note: the input is expected to range between 0 and 1
+        Args:
+            img: the shape should be B[NDHW].
         """
-        pred = torch.clamp(pred, 0, 1)
-        pred = pred.reshape(pred.shape[0], -1, 1)  # (batch, num_sample, 1)
+        img = torch.clamp(img, 0, 1)
+        img = img.reshape(img.shape[0], -1, 1)  # (batch, num_sample, 1)
         weight = torch.exp(
-            -self.preterm.to(pred) * (pred - self.bin_centers.to(pred)) ** 2
+            -self.preterm.to(img) * (img - self.bin_centers.to(img)) ** 2
         )  # (batch, num_sample, num_bin)
         weight = weight / torch.sum(weight, dim=-1, keepdim=True)  # (batch, num_sample, num_bin)
         probability = torch.mean(weight, dim=-2, keepdim=True)  # (batch, 1, num_bin)
@@ -223,11 +316,10 @@ def forward(self, pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
         """
         if target.shape != pred.shape:
             raise ValueError(f"ground truth has differing shape ({target.shape}) from pred ({pred.shape})")
-        wa, pa = self.parzen_windowing(pred)  # (batch, num_sample, num_bin), (batch, 1, num_bin)
-        wb, pb = self.parzen_windowing(target)  # (batch, num_sample, num_bin), (batch, 1, num_bin)
-        pab = torch.bmm(wa.permute(0, 2, 1), wb).div(wa.shape[1])  # (batch, num_bins, num_bins)
+        wa, pa, wb, pb = self.parzen_windowing(pred, target)  # (batch, num_sample, num_bin), (batch, 1, num_bin)
 
-        papb = torch.bmm(pa.permute(0, 2, 1), pb)  # (batch, num_bins, num_bins)
+        pab = torch.bmm(wa.permute(0, 2, 1), wb.to(wa)).div(wa.shape[1])  # (batch, num_bins, num_bins)
+        papb = torch.bmm(pa.permute(0, 2, 1), pb.to(pa))  # (batch, num_bins, num_bins)
         mi = torch.sum(
             pab * torch.log((pab + self.smooth_nr) / (papb + self.smooth_dr) + self.smooth_dr), dim=(1, 2)
         )  # (batch)
diff --git a/monai/losses/multi_scale.py b/monai/losses/multi_scale.py
index 6f9326420b..0e60cda362 100644
--- a/monai/losses/multi_scale.py
+++ b/monai/losses/multi_scale.py
@@ -67,7 +67,7 @@ def __init__(
             scales: list of scalars or None, if None, do not apply any scaling.
             kernel: gaussian or cauchy.
         """
-        super(MultiScaleLoss, self).__init__(reduction=LossReduction(reduction).value)
+        super().__init__(reduction=LossReduction(reduction).value)
         if kernel not in kernel_fn_dict.keys():
             raise ValueError(f"got unsupported kernel type: {kernel}", "only support gaussian and cauchy")
         self.kernel_fn = kernel_fn_dict[kernel]
diff --git a/monai/metrics/regression.py b/monai/metrics/regression.py
index a2a2f0853d..2bf565f543 100644
--- a/monai/metrics/regression.py
+++ b/monai/metrics/regression.py
@@ -57,9 +57,7 @@ def aggregate(self):  # type: ignore
 
     def _check_shape(self, y_pred: torch.Tensor, y: torch.Tensor) -> None:
         if y_pred.shape != y.shape:
-            raise ValueError(
-                "y_pred and y shapes dont match, received y_pred: [{}] and y: [{}]".format(y_pred.shape, y.shape)
-            )
+            raise ValueError(f"y_pred and y shapes dont match, received y_pred: [{y_pred.shape}] and y: [{y.shape}]")
 
         # also check if there is atleast one non-batch dimension i.e. num_dims >= 2
         if len(y_pred.shape) < 2:
diff --git a/monai/metrics/rocauc.py b/monai/metrics/rocauc.py
index c2679cc2ea..d331f01707 100644
--- a/monai/metrics/rocauc.py
+++ b/monai/metrics/rocauc.py
@@ -104,9 +104,11 @@ def compute_roc_auc(
 
     Args:
         y_pred: input data to compute, typical classification model output.
-            it must be One-Hot format and first dim is batch, example shape: [16] or [16, 2].
-        y: ground truth to compute ROC AUC metric, the first dim is batch.
-            example shape: [16, 1] will be converted into [16, 2] (where `2` is inferred from `y_pred`).
+            the first dim must be batch, if multi-classes, it must be in One-Hot format.
+            for example: shape `[16]` or `[16, 1]` for a binary data, shape `[16, 2]` for 2 classes data.
+        y: ground truth to compute ROC AUC metric, the first dim must be batch.
+            if multi-classes, it must be in One-Hot format.
+            for example: shape `[16]` or `[16, 1]` for a binary data, shape `[16, 2]` for 2 classes data.
         average: {``"macro"``, ``"weighted"``, ``"micro"``, ``"none"``}
             Type of averaging performed if not binary classification.
             Defaults to ``"macro"``.
diff --git a/monai/networks/blocks/activation.py b/monai/networks/blocks/activation.py
index a380f8e757..b136eb7f1f 100644
--- a/monai/networks/blocks/activation.py
+++ b/monai/networks/blocks/activation.py
@@ -48,8 +48,7 @@ class Swish(nn.Module):
 
 
     Shape:
-        - Input: :math:`(N, *)` where `*` means, any number of additional
-          dimensions
+        - Input: :math:`(N, *)` where `*` means, any number of additional dimensions
         - Output: :math:`(N, *)`, same shape as the input
 
 
@@ -123,7 +122,7 @@ class MemoryEfficientSwish(nn.Module):
     """
 
     def __init__(self, inplace: bool = False):
-        super(MemoryEfficientSwish, self).__init__()
+        super().__init__()
         # inplace only works when using torch.nn.functional.silu
         self.inplace = inplace
 
@@ -143,8 +142,7 @@ class Mish(nn.Module):
     this class will utilize `torch.nn.functional.mish` to do the calculation if meets the version.
 
     Shape:
-        - Input: :math:`(N, *)` where `*` means, any number of additional
-          dimensions
+        - Input: :math:`(N, *)` where `*` means, any number of additional dimensions
         - Output: :math:`(N, *)`, same shape as the input
 
 
@@ -158,7 +156,7 @@ class Mish(nn.Module):
     """
 
     def __init__(self, inplace: bool = False):
-        super(Mish, self).__init__()
+        super().__init__()
         # inplace only works when using torch.nn.functional.mish
         self.inplace = inplace
 
diff --git a/monai/networks/blocks/aspp.py b/monai/networks/blocks/aspp.py
index f8bf8a5ba6..9796ea8148 100644
--- a/monai/networks/blocks/aspp.py
+++ b/monai/networks/blocks/aspp.py
@@ -86,7 +86,7 @@ def __init__(
 
         out_channels = conv_out_channels * len(pads)  # final conv. output channels
         self.conv_k1 = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=out_channels,
             out_channels=out_channels,
             kernel_size=1,
diff --git a/monai/networks/blocks/convolutions.py b/monai/networks/blocks/convolutions.py
index 39ce60e3f8..d9ef0fd4a1 100644
--- a/monai/networks/blocks/convolutions.py
+++ b/monai/networks/blocks/convolutions.py
@@ -18,6 +18,7 @@
 from monai.networks.blocks import ADN
 from monai.networks.layers.convutils import same_padding, stride_minus_kernel_padding
 from monai.networks.layers.factories import Conv
+from monai.utils.deprecated import deprecated_arg
 
 
 class Convolution(nn.Sequential):
@@ -59,7 +60,7 @@ class Convolution(nn.Sequential):
         )
 
     Args:
-        dimensions: number of spatial dimensions.
+        spatial_dims: number of spatial dimensions.
         in_channels: number of input channels.
         out_channels: number of output channels.
         strides: convolution stride. Defaults to 1.
@@ -69,13 +70,13 @@ class Convolution(nn.Sequential):
         act: activation type and arguments. Defaults to PReLU.
         norm: feature normalization type and arguments. Defaults to instance norm.
         dropout: dropout ratio. Defaults to no dropout.
-        dropout_dim: determine the dimensions of dropout. Defaults to 1.
+        dropout_dim: determine the spatial dimensions of dropout. Defaults to 1.
 
             - When dropout_dim = 1, randomly zeroes some of the elements for each channel.
             - When dropout_dim = 2, Randomly zeroes out entire channels (a channel is a 2D feature map).
             - When dropout_dim = 3, Randomly zeroes out entire channels (a channel is a 3D feature map).
 
-            The value of dropout_dim should be no no larger than the value of `dimensions`.
+            The value of dropout_dim should be no no larger than the value of `spatial_dims`.
         dilation: dilation rate. Defaults to 1.
         groups: controls the connections between inputs and outputs. Defaults to 1.
         bias: whether to have a bias term. Defaults to True.
@@ -86,6 +87,9 @@ class Convolution(nn.Sequential):
         output_padding: controls the additional size added to one side of the output shape.
             Defaults to None.
 
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     See also:
 
         :py:class:`monai.networks.layers.Conv`
@@ -93,9 +97,12 @@ class Convolution(nn.Sequential):
 
     """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         strides: Union[Sequence[int], int] = 1,
@@ -112,15 +119,16 @@ def __init__(
         is_transposed: bool = False,
         padding: Optional[Union[Sequence[int], int]] = None,
         output_padding: Optional[Union[Sequence[int], int]] = None,
+        dimensions: Optional[int] = None,
     ) -> None:
         super().__init__()
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.is_transposed = is_transposed
         if padding is None:
             padding = same_padding(kernel_size, dilation)
-        conv_type = Conv[Conv.CONVTRANS if is_transposed else Conv.CONV, dimensions]
+        conv_type = Conv[Conv.CONVTRANS if is_transposed else Conv.CONV, self.dimensions]
 
         conv: nn.Module
         if is_transposed:
@@ -159,7 +167,7 @@ def __init__(
                     in_channels=out_channels,
                     act=act,
                     norm=norm,
-                    norm_dim=dimensions,
+                    norm_dim=self.dimensions,
                     dropout=dropout,
                     dropout_dim=dropout_dim,
                 ),
@@ -177,7 +185,7 @@ class ResidualUnit(nn.Module):
         from monai.networks.blocks import ResidualUnit
 
         convs = ResidualUnit(
-            dimensions=3,
+            spatial_dims=3,
             in_channels=1,
             out_channels=1,
             adn_ordering="AN",
@@ -209,7 +217,7 @@ class ResidualUnit(nn.Module):
         )
 
     Args:
-        dimensions: number of spatial dimensions.
+        spatial_dims: number of spatial dimensions.
         in_channels: number of input channels.
         out_channels: number of output channels.
         strides: convolution stride. Defaults to 1.
@@ -234,15 +242,19 @@ class ResidualUnit(nn.Module):
         padding: controls the amount of implicit zero-paddings on both sides for padding number of points
             for each dimension. Defaults to None.
 
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     See also:
 
         :py:class:`monai.networks.blocks.Convolution`
 
     """
 
+    @deprecated_arg(name="dimensions", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         strides: Union[Sequence[int], int] = 1,
@@ -257,9 +269,10 @@ def __init__(
         bias: bool = True,
         last_conv_only: bool = False,
         padding: Optional[Union[Sequence[int], int]] = None,
+        dimensions: Optional[int] = None,
     ) -> None:
         super().__init__()
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.conv = nn.Sequential()
@@ -273,7 +286,7 @@ def __init__(
         for su in range(subunits):
             conv_only = last_conv_only and su == (subunits - 1)
             unit = Convolution(
-                dimensions,
+                self.dimensions,
                 schannels,
                 out_channels,
                 strides=sstrides,
@@ -304,7 +317,7 @@ def __init__(
                 rkernel_size = 1
                 rpadding = 0
 
-            conv_type = Conv[Conv.CONV, dimensions]
+            conv_type = Conv[Conv.CONV, self.dimensions]
             self.residual = conv_type(in_channels, out_channels, rkernel_size, strides, rpadding, bias=bias)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/monai/networks/blocks/crf.py b/monai/networks/blocks/crf.py
index 49ff5bcd04..21da3bb74f 100644
--- a/monai/networks/blocks/crf.py
+++ b/monai/networks/blocks/crf.py
@@ -57,7 +57,7 @@ def __init__(
             compatibility_matrix: a matrix describing class compatibility,
                 should be NxN where N is the number of classes.
         """
-        super(CRF, self).__init__()
+        super().__init__()
         self.iterations = iterations
         self.bilateral_weight = bilateral_weight
         self.gaussian_weight = gaussian_weight
diff --git a/monai/networks/blocks/dynunet_block.py b/monai/networks/blocks/dynunet_block.py
index bb654d841c..24fb16018e 100644
--- a/monai/networks/blocks/dynunet_block.py
+++ b/monai/networks/blocks/dynunet_block.py
@@ -33,6 +33,7 @@ class UnetResBlock(nn.Module):
         kernel_size: convolution kernel size.
         stride: convolution stride.
         norm_name: feature normalization type and arguments.
+        dropout: dropout probability
 
     """
 
@@ -44,14 +45,16 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: Union[Tuple, str],
+        dropout: Optional[Union[Tuple, str, float]] = None,
     ):
-        super(UnetResBlock, self).__init__()
+        super().__init__()
         self.conv1 = get_conv_layer(
             spatial_dims,
             in_channels,
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
@@ -60,6 +63,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=1,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv3 = get_conv_layer(
@@ -68,6 +72,7 @@ def __init__(
             out_channels,
             kernel_size=1,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
@@ -107,6 +112,7 @@ class UnetBasicBlock(nn.Module):
         kernel_size: convolution kernel size.
         stride: convolution stride.
         norm_name: feature normalization type and arguments.
+        dropout: dropout probability
 
     """
 
@@ -118,14 +124,16 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: Union[Tuple, str],
+        dropout: Optional[Union[Tuple, str, float]] = None,
     ):
-        super(UnetBasicBlock, self).__init__()
+        super().__init__()
         self.conv1 = get_conv_layer(
             spatial_dims,
             in_channels,
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
@@ -134,6 +142,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=1,
+            dropout=dropout,
             conv_only=True,
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
@@ -164,6 +173,7 @@ class UnetUpBlock(nn.Module):
         stride: convolution stride.
         upsample_kernel_size: convolution kernel size for transposed convolution layers.
         norm_name: feature normalization type and arguments.
+        dropout: dropout probability
 
     """
 
@@ -176,8 +186,9 @@ def __init__(
         stride: Union[Sequence[int], int],
         upsample_kernel_size: Union[Sequence[int], int],
         norm_name: Union[Tuple, str],
+        dropout: Optional[Union[Tuple, str, float]] = None,
     ):
-        super(UnetUpBlock, self).__init__()
+        super().__init__()
         upsample_stride = upsample_kernel_size
         self.transp_conv = get_conv_layer(
             spatial_dims,
@@ -185,6 +196,7 @@ def __init__(
             out_channels,
             kernel_size=upsample_kernel_size,
             stride=upsample_stride,
+            dropout=dropout,
             conv_only=True,
             is_transposed=True,
         )
@@ -194,6 +206,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=1,
+            dropout=dropout,
             norm_name=norm_name,
         )
 
@@ -206,10 +219,12 @@ def forward(self, inp, skip):
 
 
 class UnetOutBlock(nn.Module):
-    def __init__(self, spatial_dims: int, in_channels: int, out_channels: int):
-        super(UnetOutBlock, self).__init__()
+    def __init__(
+        self, spatial_dims: int, in_channels: int, out_channels: int, dropout: Optional[Union[Tuple, str, float]] = None
+    ):
+        super().__init__()
         self.conv = get_conv_layer(
-            spatial_dims, in_channels, out_channels, kernel_size=1, stride=1, bias=True, conv_only=True
+            spatial_dims, in_channels, out_channels, kernel_size=1, stride=1, dropout=dropout, bias=True, conv_only=True
         )
 
     def forward(self, inp):
@@ -224,6 +239,7 @@ def get_conv_layer(
     stride: Union[Sequence[int], int] = 1,
     act: Optional[Union[Tuple, str]] = Act.PRELU,
     norm: Union[Tuple, str] = Norm.INSTANCE,
+    dropout: Optional[Union[Tuple, str, float]] = None,
     bias: bool = False,
     conv_only: bool = True,
     is_transposed: bool = False,
@@ -240,6 +256,7 @@ def get_conv_layer(
         kernel_size=kernel_size,
         act=act,
         norm=norm,
+        dropout=dropout,
         bias=bias,
         conv_only=conv_only,
         is_transposed=is_transposed,
diff --git a/monai/networks/blocks/dynunet_block_v1.py b/monai/networks/blocks/dynunet_block_v1.py
index d5d9bbf3dc..b5b88dd0df 100644
--- a/monai/networks/blocks/dynunet_block_v1.py
+++ b/monai/networks/blocks/dynunet_block_v1.py
@@ -32,6 +32,7 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: str,
+        dropout: float = 0.0,
     ):
         nn.Module.__init__(self)
         self.conv1 = get_conv_layer(
@@ -40,6 +41,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
@@ -48,6 +50,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=1,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv3 = get_conv_layer(
@@ -56,6 +59,7 @@ def __init__(
             out_channels,
             kernel_size=1,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
@@ -81,6 +85,7 @@ def __init__(
         kernel_size: Union[Sequence[int], int],
         stride: Union[Sequence[int], int],
         norm_name: str,
+        dropout: float = 0.0,
     ):
         nn.Module.__init__(self)
         self.conv1 = get_conv_layer(
@@ -89,6 +94,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=stride,
+            dropout=dropout,
             conv_only=True,
         )
         self.conv2 = get_conv_layer(
@@ -97,6 +103,7 @@ def __init__(
             out_channels,
             kernel_size=kernel_size,
             stride=1,
+            dropout=dropout,
             conv_only=True,
         )
         self.lrelu = get_act_layer(("leakyrelu", {"inplace": True, "negative_slope": 0.01}))
@@ -118,6 +125,7 @@ def __init__(
         stride: Union[Sequence[int], int],
         upsample_kernel_size: Union[Sequence[int], int],
         norm_name: str,
+        dropout: float = 0.0,
     ):
         nn.Module.__init__(self)
         upsample_stride = upsample_kernel_size
@@ -127,6 +135,7 @@ def __init__(
             out_channels,
             kernel_size=upsample_kernel_size,
             stride=upsample_stride,
+            dropout=dropout,
             conv_only=True,
             is_transposed=True,
         )
@@ -137,6 +146,7 @@ def __init__(
             kernel_size=kernel_size,
             stride=1,
             norm_name=norm_name,
+            dropout=dropout,
         )
 
 
diff --git a/monai/networks/blocks/fcn.py b/monai/networks/blocks/fcn.py
index d84e506774..1170dfa7e3 100644
--- a/monai/networks/blocks/fcn.py
+++ b/monai/networks/blocks/fcn.py
@@ -36,7 +36,7 @@ def __init__(self, inplanes: int, planes: int, ks: int = 7):
             planes: number of output channels.
             ks: kernel size for one dimension. Defaults to 7.
         """
-        super(GCN, self).__init__()
+        super().__init__()
 
         conv2d_type: Type[nn.Conv2d] = Conv[Conv.CONV, 2]
         self.conv_l1 = conv2d_type(in_channels=inplanes, out_channels=planes, kernel_size=(ks, 1), padding=(ks // 2, 0))
@@ -67,7 +67,7 @@ def __init__(self, planes: int):
         Args:
             planes: number of input channels.
         """
-        super(Refine, self).__init__()
+        super().__init__()
 
         relu_type: Type[nn.ReLU] = Act[Act.RELU]
         conv2d_type: Type[nn.Conv2d] = Conv[Conv.CONV, 2]
@@ -116,7 +116,7 @@ class FCN(nn.Module):
     def __init__(
         self, out_channels: int = 1, upsample_mode: str = "bilinear", pretrained: bool = True, progress: bool = True
     ):
-        super(FCN, self).__init__()
+        super().__init__()
 
         conv2d_type: Type[nn.Conv2d] = Conv[Conv.CONV, 2]
 
@@ -155,7 +155,7 @@ def __init__(
 
         if self.upsample_mode == "transpose":
             self.up_conv = UpSample(
-                dimensions=2,
+                spatial_dims=2,
                 in_channels=self.out_channels,
                 scale_factor=2,
                 mode="deconv",
@@ -231,12 +231,12 @@ def __init__(
         pretrained: bool = True,
         progress: bool = True,
     ):
-        super(MCFCN, self).__init__(
+        super().__init__(
             out_channels=out_channels, upsample_mode=upsample_mode, pretrained=pretrained, progress=progress
         )
 
         self.init_proj = Convolution(
-            dimensions=2,
+            spatial_dims=2,
             in_channels=in_channels,
             out_channels=3,
             kernel_size=1,
@@ -251,4 +251,4 @@ def forward(self, x: torch.Tensor):
             x: in shape (batch, in_channels, spatial_1, spatial_2).
         """
         x = self.init_proj(x)
-        return super(MCFCN, self).forward(x)
+        return super().forward(x)
diff --git a/monai/networks/blocks/localnet_block.py b/monai/networks/blocks/localnet_block.py
index 3997d42436..844c5c0eea 100644
--- a/monai/networks/blocks/localnet_block.py
+++ b/monai/networks/blocks/localnet_block.py
@@ -29,7 +29,7 @@ def get_conv_block(
     norm: Optional[Union[Tuple, str]] = "BATCH",
 ) -> nn.Module:
     padding = same_padding(kernel_size)
-    return Convolution(
+    mod: nn.Module = Convolution(
         spatial_dims,
         in_channels,
         out_channels,
@@ -40,6 +40,7 @@ def get_conv_block(
         conv_only=False,
         padding=padding,
     )
+    return mod
 
 
 def get_conv_layer(
@@ -49,7 +50,7 @@ def get_conv_layer(
     kernel_size: Union[Sequence[int], int] = 3,
 ) -> nn.Module:
     padding = same_padding(kernel_size)
-    return Convolution(
+    mod: nn.Module = Convolution(
         spatial_dims,
         in_channels,
         out_channels,
@@ -58,6 +59,7 @@ def get_conv_layer(
         conv_only=True,
         padding=padding,
     )
+    return mod
 
 
 def get_deconv_block(
@@ -65,8 +67,8 @@ def get_deconv_block(
     in_channels: int,
     out_channels: int,
 ) -> nn.Module:
-    return Convolution(
-        dimensions=spatial_dims,
+    mod: nn.Module = Convolution(
+        spatial_dims=spatial_dims,
         in_channels=in_channels,
         out_channels=out_channels,
         strides=2,
@@ -77,6 +79,7 @@ def get_deconv_block(
         padding=1,
         output_padding=1,
     )
+    return mod
 
 
 class ResidualBlock(nn.Module):
@@ -87,7 +90,7 @@ def __init__(
         out_channels: int,
         kernel_size: Union[Sequence[int], int],
     ) -> None:
-        super(ResidualBlock, self).__init__()
+        super().__init__()
         if in_channels != out_channels:
             raise ValueError(
                 f"expecting in_channels == out_channels, " f"got in_channels={in_channels}, out_channels={out_channels}"
@@ -116,7 +119,7 @@ def __init__(
         in_channels: int,
         out_channels: int,
     ) -> None:
-        super(LocalNetResidualBlock, self).__init__()
+        super().__init__()
         if in_channels != out_channels:
             raise ValueError(
                 f"expecting in_channels == out_channels, " f"got in_channels={in_channels}, out_channels={out_channels}"
@@ -162,7 +165,7 @@ def __init__(
         Raises:
             NotImplementedError: when ``kernel_size`` is even
         """
-        super(LocalNetDownSampleBlock, self).__init__()
+        super().__init__()
         self.conv_block = get_conv_block(
             spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size
         )
@@ -222,7 +225,7 @@ def __init__(
         Raises:
             ValueError: when ``in_channels != 2 * out_channels``
         """
-        super(LocalNetUpSampleBlock, self).__init__()
+        super().__init__()
         self.deconv_block = get_deconv_block(
             spatial_dims=spatial_dims,
             in_channels=in_channels,
@@ -306,7 +309,7 @@ def __init__(
         act: activation type and arguments. Defaults to ReLU.
         kernel_initializer: kernel initializer. Defaults to None.
         """
-        super(LocalNetFeatureExtractorBlock, self).__init__()
+        super().__init__()
         self.conv_block = get_conv_block(
             spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, act=act, norm=None
         )
diff --git a/monai/networks/blocks/patchembedding.py b/monai/networks/blocks/patchembedding.py
index c1fcfa9af7..8170106dec 100644
--- a/monai/networks/blocks/patchembedding.py
+++ b/monai/networks/blocks/patchembedding.py
@@ -62,7 +62,7 @@ def __init__(
 
         """
 
-        super(PatchEmbeddingBlock, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
diff --git a/monai/networks/blocks/regunet_block.py b/monai/networks/blocks/regunet_block.py
index d2cd3518b9..18770a0d48 100644
--- a/monai/networks/blocks/regunet_block.py
+++ b/monai/networks/blocks/regunet_block.py
@@ -32,7 +32,7 @@ def get_conv_block(
 ) -> nn.Module:
     if padding is None:
         padding = same_padding(kernel_size)
-    conv_block = Convolution(
+    conv_block: nn.Module = Convolution(
         spatial_dims,
         in_channels,
         out_channels,
@@ -65,7 +65,7 @@ def get_conv_layer(
     kernel_size: Union[Sequence[int], int] = 3,
 ) -> nn.Module:
     padding = same_padding(kernel_size)
-    return Convolution(
+    mod: nn.Module = Convolution(
         spatial_dims,
         in_channels,
         out_channels,
@@ -74,6 +74,7 @@ def get_conv_layer(
         conv_only=True,
         padding=padding,
     )
+    return mod
 
 
 class RegistrationResidualConvBlock(nn.Module):
@@ -99,7 +100,7 @@ def __init__(
             num_layers: number of layers inside the block
             kernel_size: kernel_size
         """
-        super(RegistrationResidualConvBlock, self).__init__()
+        super().__init__()
         self.num_layers = num_layers
         self.layers = nn.ModuleList(
             [
@@ -157,7 +158,7 @@ def __init__(
             channels: channels
             pooling: use MaxPool if True, strided conv if False
         """
-        super(RegistrationDownSampleBlock, self).__init__()
+        super().__init__()
         if pooling:
             self.layer = Pool[Pool.MAX, spatial_dims](kernel_size=2)
         else:
@@ -193,8 +194,8 @@ def get_deconv_block(
     in_channels: int,
     out_channels: int,
 ) -> nn.Module:
-    return Convolution(
-        dimensions=spatial_dims,
+    mod: nn.Module = Convolution(
+        spatial_dims=spatial_dims,
         in_channels=in_channels,
         out_channels=out_channels,
         strides=2,
@@ -205,6 +206,7 @@ def get_deconv_block(
         padding=1,
         output_padding=1,
     )
+    return mod
 
 
 class RegistrationExtractionBlock(nn.Module):
@@ -233,7 +235,7 @@ def __init__(
             kernel_initializer: kernel initializer
             activation: kernel activation function
         """
-        super(RegistrationExtractionBlock, self).__init__()
+        super().__init__()
         self.extract_levels = extract_levels
         self.max_level = max(extract_levels)
         self.layers = nn.ModuleList(
diff --git a/monai/networks/blocks/segresnet_block.py b/monai/networks/blocks/segresnet_block.py
index d8f6d7b268..4d2a5b0623 100644
--- a/monai/networks/blocks/segresnet_block.py
+++ b/monai/networks/blocks/segresnet_block.py
@@ -39,7 +39,7 @@ def get_upsample_layer(
     spatial_dims: int, in_channels: int, upsample_mode: Union[UpsampleMode, str] = "nontrainable", scale_factor: int = 2
 ):
     return UpSample(
-        dimensions=spatial_dims,
+        spatial_dims=spatial_dims,
         in_channels=in_channels,
         out_channels=in_channels,
         scale_factor=scale_factor,
diff --git a/monai/networks/blocks/selfattention.py b/monai/networks/blocks/selfattention.py
index 9dc45cccc8..649f757bf8 100644
--- a/monai/networks/blocks/selfattention.py
+++ b/monai/networks/blocks/selfattention.py
@@ -37,7 +37,7 @@ def __init__(
 
         """
 
-        super(SABlock, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
diff --git a/monai/networks/blocks/squeeze_and_excitation.py b/monai/networks/blocks/squeeze_and_excitation.py
index 4db6dc30f7..46cd48d6aa 100644
--- a/monai/networks/blocks/squeeze_and_excitation.py
+++ b/monai/networks/blocks/squeeze_and_excitation.py
@@ -50,7 +50,7 @@ def __init__(
             :py:class:`monai.networks.layers.Act`
 
         """
-        super(ChannelSELayer, self).__init__()
+        super().__init__()
 
         self.add_residual = add_residual
 
@@ -181,21 +181,21 @@ def __init__(
             :py:class:`monai.networks.blocks.ChannelSELayer`
 
         """
-        super(SEBlock, self).__init__()
+        super().__init__()
 
         if not conv_param_1:
             conv_param_1 = {"kernel_size": 1, "norm": Norm.BATCH, "act": ("relu", {"inplace": True})}
         self.conv1 = Convolution(
-            dimensions=spatial_dims, in_channels=in_channels, out_channels=n_chns_1, **conv_param_1
+            spatial_dims=spatial_dims, in_channels=in_channels, out_channels=n_chns_1, **conv_param_1
         )
 
         if not conv_param_2:
             conv_param_2 = {"kernel_size": 3, "norm": Norm.BATCH, "act": ("relu", {"inplace": True})}
-        self.conv2 = Convolution(dimensions=spatial_dims, in_channels=n_chns_1, out_channels=n_chns_2, **conv_param_2)
+        self.conv2 = Convolution(spatial_dims=spatial_dims, in_channels=n_chns_1, out_channels=n_chns_2, **conv_param_2)
 
         if not conv_param_3:
             conv_param_3 = {"kernel_size": 1, "norm": Norm.BATCH, "act": None}
-        self.conv3 = Convolution(dimensions=spatial_dims, in_channels=n_chns_2, out_channels=n_chns_3, **conv_param_3)
+        self.conv3 = Convolution(spatial_dims=spatial_dims, in_channels=n_chns_2, out_channels=n_chns_3, **conv_param_3)
 
         self.se_layer = ChannelSELayer(
             spatial_dims=spatial_dims, in_channels=n_chns_3, r=r, acti_type_1=acti_type_1, acti_type_2=acti_type_2
@@ -264,7 +264,7 @@ def __init__(
         }
         conv_param_3 = {"strides": 1, "kernel_size": 1, "act": None, "norm": Norm.BATCH, "bias": False}
 
-        super(SEBottleneck, self).__init__(
+        super().__init__(
             spatial_dims=spatial_dims,
             in_channels=inplanes,
             n_chns_1=planes * 2,
@@ -315,7 +315,7 @@ def __init__(
         }
         conv_param_3 = {"strides": 1, "kernel_size": 1, "act": None, "norm": Norm.BATCH, "bias": False}
 
-        super(SEResNetBottleneck, self).__init__(
+        super().__init__(
             spatial_dims=spatial_dims,
             in_channels=inplanes,
             n_chns_1=planes,
diff --git a/monai/networks/blocks/unetr_block.py b/monai/networks/blocks/unetr_block.py
index a0852d05e0..ccc055e889 100644
--- a/monai/networks/blocks/unetr_block.py
+++ b/monai/networks/blocks/unetr_block.py
@@ -46,7 +46,7 @@ def __init__(
 
         """
 
-        super(UnetrUpBlock, self).__init__()
+        super().__init__()
         upsample_stride = upsample_kernel_size
         self.transp_conv = get_conv_layer(
             spatial_dims,
diff --git a/monai/networks/blocks/upsample.py b/monai/networks/blocks/upsample.py
index 5320611ce6..a6aa13dde4 100644
--- a/monai/networks/blocks/upsample.py
+++ b/monai/networks/blocks/upsample.py
@@ -16,7 +16,7 @@
 
 from monai.networks.layers.factories import Conv, Pad, Pool
 from monai.networks.utils import icnr_init, pixelshuffle
-from monai.utils import InterpolateMode, UpsampleMode, ensure_tuple_rep, look_up_option
+from monai.utils import InterpolateMode, UpsampleMode, deprecated_arg, ensure_tuple_rep, look_up_option
 
 __all__ = ["Upsample", "UpSample", "SubpixelUpsample", "Subpixelupsample", "SubpixelUpSample"]
 
@@ -34,9 +34,12 @@ class UpSample(nn.Sequential):
     (often used to map the number of features from `in_channels` to `out_channels`).
     """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: Optional[int] = None,
         out_channels: Optional[int] = None,
         scale_factor: Union[Sequence[float], float] = 2,
@@ -47,10 +50,11 @@ def __init__(
         align_corners: Optional[bool] = True,
         bias: bool = True,
         apply_pad_pool: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
         """
         Args:
-            dimensions: number of spatial dimensions of the input image.
+            spatial_dims: number of spatial dimensions of the input image.
             in_channels: number of channels of the input image.
             out_channels: number of channels of the output image. Defaults to `in_channels`.
             scale_factor: multiplier for spatial size. Has to match input size if it is a tuple. Defaults to 2.
@@ -75,16 +79,21 @@ def __init__(
             apply_pad_pool: if True the upsampled tensor is padded then average pooling is applied with a kernel the
                 size of `scale_factor` with a stride of 1. See also: :py:class:`monai.networks.blocks.SubpixelUpsample`.
                 Only used in the "pixelshuffle" mode.
+
+        .. deprecated:: 0.6.0
+            ``dimensions`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
-        scale_factor_ = ensure_tuple_rep(scale_factor, dimensions)
+        if dimensions is not None:
+            spatial_dims = dimensions
+        scale_factor_ = ensure_tuple_rep(scale_factor, spatial_dims)
         up_mode = look_up_option(mode, UpsampleMode)
         if up_mode == UpsampleMode.DECONV:
             if not in_channels:
                 raise ValueError(f"in_channels needs to be specified in the '{mode}' mode.")
             self.add_module(
                 "deconv",
-                Conv[Conv.CONVTRANS, dimensions](
+                Conv[Conv.CONVTRANS, spatial_dims](
                     in_channels=in_channels,
                     out_channels=out_channels or in_channels,
                     kernel_size=scale_factor_,
@@ -98,7 +107,7 @@ def __init__(
                     raise ValueError(f"in_channels needs to be specified in the '{mode}' mode.")
                 self.add_module(
                     "preconv",
-                    Conv[Conv.CONV, dimensions](
+                    Conv[Conv.CONV, spatial_dims](
                         in_channels=in_channels, out_channels=out_channels or in_channels, kernel_size=1, bias=bias
                     ),
                 )
@@ -112,7 +121,7 @@ def __init__(
             interp_mode = InterpolateMode(interp_mode)
             linear_mode = [InterpolateMode.LINEAR, InterpolateMode.BILINEAR, InterpolateMode.TRILINEAR]
             if interp_mode in linear_mode:  # choose mode based on dimensions
-                interp_mode = linear_mode[dimensions - 1]
+                interp_mode = linear_mode[spatial_dims - 1]
             self.add_module(
                 "upsample_non_trainable",
                 nn.Upsample(
@@ -126,7 +135,7 @@ def __init__(
             self.add_module(
                 "pixelshuffle",
                 SubpixelUpsample(
-                    dimensions=dimensions,
+                    spatial_dims=spatial_dims,
                     in_channels=in_channels,
                     out_channels=out_channels,
                     scale_factor=scale_factor_[0],  # isotropic
@@ -164,19 +173,23 @@ class SubpixelUpsample(nn.Module):
 
     """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: Optional[int],
         out_channels: Optional[int] = None,
         scale_factor: int = 2,
         conv_block: Optional[Union[nn.Module, str]] = "default",
         apply_pad_pool: bool = True,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
         """
         Args:
-            dimensions: number of spatial dimensions of the input image.
+            spatial_dims: number of spatial dimensions of the input image.
             in_channels: number of channels of the input image.
             out_channels: optional number of channels of the output image.
             scale_factor: multiplier for spatial size. Defaults to 2.
@@ -190,21 +203,24 @@ def __init__(
                 size of `scale_factor` with a stride of 1. This implements the nearest neighbour resize convolution
                 component of subpixel convolutions described in Aitken et al.
             bias: whether to have a bias term in the default conv_block. Defaults to True.
+
+        .. deprecated:: 0.6.0
+            ``dimensions`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
 
         if scale_factor <= 0:
             raise ValueError(f"The `scale_factor` multiplier must be an integer greater than 0, got {scale_factor}.")
 
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.scale_factor = scale_factor
 
         if conv_block == "default":
             out_channels = out_channels or in_channels
             if not out_channels:
                 raise ValueError("in_channels need to be specified.")
-            conv_out_channels = out_channels * (scale_factor ** dimensions)
-            self.conv_block = Conv[Conv.CONV, dimensions](
+            conv_out_channels = out_channels * (scale_factor ** self.dimensions)
+            self.conv_block = Conv[Conv.CONV, self.dimensions](
                 in_channels=in_channels, out_channels=conv_out_channels, kernel_size=3, stride=1, padding=1, bias=bias
             )
 
diff --git a/monai/networks/blocks/warp.py b/monai/networks/blocks/warp.py
index d916c026ff..ccdfdb964e 100644
--- a/monai/networks/blocks/warp.py
+++ b/monai/networks/blocks/warp.py
@@ -50,7 +50,7 @@ def __init__(
 
         See also: :py:class:`monai.networks.layers.grid_pull`
         """
-        super(Warp, self).__init__()
+        super().__init__()
         # resolves _interp_mode for different methods
 
         if USE_COMPILED:
@@ -148,7 +148,7 @@ def __init__(
         mode=GridSampleMode.BILINEAR.value,
         padding_mode=GridSamplePadMode.ZEROS.value,
     ):
-        super(DVF2DDF, self).__init__()
+        super().__init__()
         if num_steps <= 0:
             raise ValueError(f"expecting positive num_steps, got {num_steps}")
         self.num_steps = num_steps
diff --git a/monai/networks/layers/simplelayers.py b/monai/networks/layers/simplelayers.py
index 52f19aab29..0f9f78b4be 100644
--- a/monai/networks/layers/simplelayers.py
+++ b/monai/networks/layers/simplelayers.py
@@ -10,6 +10,7 @@
 # limitations under the License.
 
 import math
+from copy import deepcopy
 from typing import List, Sequence, Union
 
 import torch
@@ -24,10 +25,10 @@
     ChannelMatching,
     InvalidPyTorchVersionError,
     SkipMode,
-    ensure_tuple_rep,
     look_up_option,
     optional_import,
 )
+from monai.utils.misc import issequenceiterable
 
 _C, _ = optional_import("monai._C")
 if not PT_BEFORE_1_7:
@@ -393,13 +394,18 @@ def __init__(
                 (for example `parameters()` iterator could be used to get the parameters);
                 otherwise this module will fix the kernels using `sigma` as the std.
         """
+        if issequenceiterable(sigma):
+            if len(sigma) != spatial_dims:  # type: ignore
+                raise ValueError
+        else:
+            sigma = [deepcopy(sigma) for _ in range(spatial_dims)]  # type: ignore
         super().__init__()
         self.sigma = [
             torch.nn.Parameter(
                 torch.as_tensor(s, dtype=torch.float, device=s.device if isinstance(s, torch.Tensor) else None),
                 requires_grad=requires_grad,
             )
-            for s in ensure_tuple_rep(sigma, int(spatial_dims))
+            for s in sigma  # type: ignore
         ]
         self.truncated = truncated
         self.approx = approx
@@ -449,7 +455,7 @@ class LLTM(nn.Module):
     """
 
     def __init__(self, input_features: int, state_size: int):
-        super(LLTM, self).__init__()
+        super().__init__()
         self.input_features = input_features
         self.state_size = state_size
         self.weights = nn.Parameter(torch.empty(3 * state_size, input_features + state_size))
diff --git a/monai/networks/layers/spatial_transforms.py b/monai/networks/layers/spatial_transforms.py
index 511c24fcb0..6b5acb166a 100644
--- a/monai/networks/layers/spatial_transforms.py
+++ b/monai/networks/layers/spatial_transforms.py
@@ -46,7 +46,9 @@ def backward(ctx, grad):
             return None, grads[0], None, None, None
 
 
-def grid_pull(input: torch.Tensor, grid: torch.Tensor, interpolation="linear", bound="zero", extrapolate: bool = True):
+def grid_pull(
+    input: torch.Tensor, grid: torch.Tensor, interpolation="linear", bound="zero", extrapolate: bool = True
+) -> torch.Tensor:
     """
     Sample an image with respect to a deformation field.
 
@@ -112,8 +114,9 @@ def grid_pull(input: torch.Tensor, grid: torch.Tensor, interpolation="linear", b
         _C.InterpolationType.__members__[i] if isinstance(i, str) else _C.InterpolationType(i)
         for i in ensure_tuple(interpolation)
     ]
-
-    return _GridPull.apply(input, grid, interpolation, bound, extrapolate)
+    out: torch.Tensor
+    out = _GridPull.apply(input, grid, interpolation, bound, extrapolate)  # type: ignore
+    return out
 
 
 class _GridPush(torch.autograd.Function):
diff --git a/monai/networks/nets/__init__.py b/monai/networks/nets/__init__.py
index ad1ca2418b..6076fcbe3d 100644
--- a/monai/networks/nets/__init__.py
+++ b/monai/networks/nets/__init__.py
@@ -80,6 +80,7 @@
     seresnext101,
 )
 from .torchvision_fc import TorchVisionFCModel, TorchVisionFullyConvModel
+from .transchex import BertAttention, BertMixedLayer, BertOutput, BertPreTrainedModel, MultiModal, Pooler, Transchex
 from .unet import UNet, Unet, unet
 from .unetr import UNETR
 from .varautoencoder import VarAutoEncoder
diff --git a/monai/networks/nets/ahnet.py b/monai/networks/nets/ahnet.py
index 5ca6813efe..0a3f938b96 100644
--- a/monai/networks/nets/ahnet.py
+++ b/monai/networks/nets/ahnet.py
@@ -35,7 +35,7 @@ def __init__(
         downsample: Optional[nn.Sequential] = None,
     ) -> None:
 
-        super(Bottleneck3x3x1, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -87,7 +87,7 @@ def forward(self, x):
 
 class Projection(nn.Sequential):
     def __init__(self, spatial_dims: int, num_input_features: int, num_output_features: int):
-        super(Projection, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -108,7 +108,7 @@ def __init__(
         growth_rate: int,
         dropout_prob: float,
     ):
-        super(DenseBlock, self).__init__()
+        super().__init__()
         for i in range(num_layers):
             layer = Pseudo3DLayer(
                 spatial_dims, num_input_features + i * growth_rate, growth_rate, bn_size, dropout_prob
@@ -120,7 +120,7 @@ class UpTransition(nn.Sequential):
     def __init__(
         self, spatial_dims: int, num_input_features: int, num_output_features: int, upsample_mode: str = "transpose"
     ):
-        super(UpTransition, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -145,7 +145,7 @@ class Final(nn.Sequential):
     def __init__(
         self, spatial_dims: int, num_input_features: int, num_output_features: int, upsample_mode: str = "transpose"
     ):
-        super(Final, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -178,7 +178,7 @@ def __init__(
 
 class Pseudo3DLayer(nn.Module):
     def __init__(self, spatial_dims: int, num_input_features: int, growth_rate: int, bn_size: int, dropout_prob: float):
-        super(Pseudo3DLayer, self).__init__()
+        super().__init__()
         # 1x1x1
 
         conv_type = Conv[Conv.CONV, spatial_dims]
@@ -244,7 +244,7 @@ def forward(self, x):
 
 class PSP(nn.Module):
     def __init__(self, spatial_dims: int, psp_block_num: int, in_ch: int, upsample_mode: str = "transpose"):
-        super(PSP, self).__init__()
+        super().__init__()
         self.up_modules = nn.ModuleList()
         conv_type = Conv[Conv.CONV, spatial_dims]
         pool_type: Type[Union[nn.MaxPool2d, nn.MaxPool3d]] = Pool[Pool.MAX, spatial_dims]
@@ -356,7 +356,7 @@ def __init__(
         progress: bool = True,
     ):
         self.inplanes = 64
-        super(AHNet, self).__init__()
+        super().__init__()
 
         conv_type = Conv[Conv.CONV, spatial_dims]
         conv_trans_type = Conv[Conv.CONVTRANS, spatial_dims]
diff --git a/monai/networks/nets/autoencoder.py b/monai/networks/nets/autoencoder.py
index d0a54b8148..f4a0451dc7 100644
--- a/monai/networks/nets/autoencoder.py
+++ b/monai/networks/nets/autoencoder.py
@@ -16,14 +16,84 @@
 
 from monai.networks.blocks import Convolution, ResidualUnit
 from monai.networks.layers.factories import Act, Norm
+from monai.utils import deprecated_arg
 
 __all__ = ["AutoEncoder"]
 
 
 class AutoEncoder(nn.Module):
+    """
+    Simple definition of an autoencoder and base class for the architecture implementing
+    :py:class:`monai.networks.nets.VarAutoEncoder`. The network is composed of an encode sequence of blocks, followed
+    by an intermediary sequence of blocks, and finally a decode sequence of blocks. The encode and decode blocks are
+    default :py:class:`monai.networks.blocks.Convolution` instances with the encode blocks having the given stride
+    and the decode blocks having transpose convolutions with the same stride. If `num_res_units` is given residual
+    blocks are used instead.
+
+    By default the intermediary sequence is empty but if `inter_channels` is given to specify the output channels of
+    blocks then this will be become a sequence of Convolution blocks or of residual blocks if `num_inter_units` is
+    given. The optional parameter `inter_dilations` can be used to specify the dilation values of the convolutions in
+    these blocks, this allows a network to use dilated kernels in this  middle section. Since the intermediary section
+    isn't meant to change the size of the output the strides for all these kernels is 1.
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode.
+        inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1.
+        num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Examples::
+
+        from monai.networks.nets import AutoEncoder
+
+        # 3 layers each down/up sampling their inputs by a factor 2 with no intermediate layer
+        net = AutoEncoder(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(2, 4, 8),
+            strides=(2, 2, 2)
+        )
+
+        # 1 layer downsampling by 2, followed by a sequence of residual units with 2 convolutions defined by
+        # progressively increasing dilations, then final upsample layer
+        net = AutoEncoder(
+                spatial_dims=2,
+                in_channels=1,
+                out_channels=1,
+                channels=(4,),
+                strides=(2,),
+                inter_channels=(8, 8, 8),
+                inter_dilations=(1, 2, 4),
+                num_inter_units=2
+            )
+
+    """
+
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         channels: Sequence[int],
@@ -38,10 +108,11 @@ def __init__(
         norm: Union[Tuple, str] = Norm.INSTANCE,
         dropout: Optional[Union[Tuple, str, float]] = None,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
 
         super().__init__()
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims if dimensions is None else dimensions
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.channels = list(channels)
@@ -71,6 +142,9 @@ def __init__(
     def _get_encode_module(
         self, in_channels: int, channels: Sequence[int], strides: Sequence[int]
     ) -> Tuple[nn.Sequential, int]:
+        """
+        Returns the encode part of the network by building up a sequence of layers returned by `_get_encode_layer`.
+        """
         encode = nn.Sequential()
         layer_channels = in_channels
 
@@ -82,6 +156,10 @@ def _get_encode_module(
         return encode, layer_channels
 
     def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tuple[nn.Module, int]:
+        """
+        Returns the intermediate block of the network which accepts input from the encoder and whose output goes
+        to the decoder.
+        """
         # Define some types
         intermediate: nn.Module
         unit: nn.Module
@@ -95,7 +173,7 @@ def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tu
             for i, (dc, di) in enumerate(zip(self.inter_channels, self.inter_dilations)):
                 if self.num_inter_units > 0:
                     unit = ResidualUnit(
-                        dimensions=self.dimensions,
+                        spatial_dims=self.dimensions,
                         in_channels=layer_channels,
                         out_channels=dc,
                         strides=1,
@@ -109,7 +187,7 @@ def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tu
                     )
                 else:
                     unit = Convolution(
-                        dimensions=self.dimensions,
+                        spatial_dims=self.dimensions,
                         in_channels=layer_channels,
                         out_channels=dc,
                         strides=1,
@@ -129,6 +207,9 @@ def _get_intermediate_module(self, in_channels: int, num_inter_units: int) -> Tu
     def _get_decode_module(
         self, in_channels: int, channels: Sequence[int], strides: Sequence[int]
     ) -> Tuple[nn.Sequential, int]:
+        """
+        Returns the decode part of the network by building up a sequence of layers returned by `_get_decode_layer`.
+        """
         decode = nn.Sequential()
         layer_channels = in_channels
 
@@ -140,10 +221,13 @@ def _get_decode_module(
         return decode, layer_channels
 
     def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Module:
-
+        """
+        Returns a single layer of the encoder part of the network.
+        """
+        mod: nn.Module
         if self.num_res_units > 0:
-            return ResidualUnit(
-                dimensions=self.dimensions,
+            mod = ResidualUnit(
+                spatial_dims=self.dimensions,
                 in_channels=in_channels,
                 out_channels=out_channels,
                 strides=strides,
@@ -155,8 +239,8 @@ def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, i
                 bias=self.bias,
                 last_conv_only=is_last,
             )
-        return Convolution(
-            dimensions=self.dimensions,
+        mod = Convolution(
+            spatial_dims=self.dimensions,
             in_channels=in_channels,
             out_channels=out_channels,
             strides=strides,
@@ -167,13 +251,16 @@ def _get_encode_layer(self, in_channels: int, out_channels: int, strides: int, i
             bias=self.bias,
             conv_only=is_last,
         )
+        return mod
 
     def _get_decode_layer(self, in_channels: int, out_channels: int, strides: int, is_last: bool) -> nn.Sequential:
-
+        """
+        Returns a single layer of the decoder part of the network.
+        """
         decode = nn.Sequential()
 
         conv = Convolution(
-            dimensions=self.dimensions,
+            spatial_dims=self.dimensions,
             in_channels=in_channels,
             out_channels=out_channels,
             strides=strides,
@@ -190,7 +277,7 @@ def _get_decode_layer(self, in_channels: int, out_channels: int, strides: int, i
 
         if self.num_res_units > 0:
             ru = ResidualUnit(
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 in_channels=out_channels,
                 out_channels=out_channels,
                 strides=1,
diff --git a/monai/networks/nets/basic_unet.py b/monai/networks/nets/basic_unet.py
index 63205f45ee..f96b299d2b 100644
--- a/monai/networks/nets/basic_unet.py
+++ b/monai/networks/nets/basic_unet.py
@@ -16,7 +16,7 @@
 
 from monai.networks.blocks import Convolution, UpSample
 from monai.networks.layers.factories import Conv, Pool
-from monai.utils import ensure_tuple_rep
+from monai.utils import deprecated_arg, ensure_tuple_rep
 
 __all__ = ["BasicUNet", "BasicUnet", "Basicunet"]
 
@@ -24,19 +24,21 @@
 class TwoConv(nn.Sequential):
     """two convolutions."""
 
+    @deprecated_arg(name="dim", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dim: int,
+        spatial_dims: int,
         in_chns: int,
         out_chns: int,
         act: Union[str, tuple],
         norm: Union[str, tuple],
         bias: bool,
         dropout: Union[float, tuple] = 0.0,
+        dim: Optional[int] = None,
     ):
         """
         Args:
-            dim: number of spatial dimensions.
+            spatial_dims: number of spatial dimensions.
             in_chns: number of input channels.
             out_chns: number of output channels.
             act: activation type and arguments.
@@ -44,11 +46,17 @@ def __init__(
             bias: whether to have a bias term in convolution blocks.
             dropout: dropout ratio. Defaults to no dropout.
 
+        .. deprecated:: 0.6.0
+            ``dim`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
 
-        conv_0 = Convolution(dim, in_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1)
-        conv_1 = Convolution(dim, out_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1)
+        if dim is not None:
+            spatial_dims = dim
+        conv_0 = Convolution(spatial_dims, in_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1)
+        conv_1 = Convolution(
+            spatial_dims, out_chns, out_chns, act=act, norm=norm, dropout=dropout, bias=bias, padding=1
+        )
         self.add_module("conv_0", conv_0)
         self.add_module("conv_1", conv_1)
 
@@ -56,19 +64,21 @@ def __init__(
 class Down(nn.Sequential):
     """maxpooling downsampling and two convolutions."""
 
+    @deprecated_arg(name="dim", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dim: int,
+        spatial_dims: int,
         in_chns: int,
         out_chns: int,
         act: Union[str, tuple],
         norm: Union[str, tuple],
         bias: bool,
         dropout: Union[float, tuple] = 0.0,
+        dim: Optional[int] = None,
     ):
         """
         Args:
-            dim: number of spatial dimensions.
+            spatial_dims: number of spatial dimensions.
             in_chns: number of input channels.
             out_chns: number of output channels.
             act: activation type and arguments.
@@ -76,11 +86,14 @@ def __init__(
             bias: whether to have a bias term in convolution blocks.
             dropout: dropout ratio. Defaults to no dropout.
 
+        .. deprecated:: 0.6.0
+            ``dim`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
-
-        max_pooling = Pool["MAX", dim](kernel_size=2)
-        convs = TwoConv(dim, in_chns, out_chns, act, norm, bias, dropout)
+        if dim is not None:
+            spatial_dims = dim
+        max_pooling = Pool["MAX", spatial_dims](kernel_size=2)
+        convs = TwoConv(spatial_dims, in_chns, out_chns, act, norm, bias, dropout)
         self.add_module("max_pooling", max_pooling)
         self.add_module("convs", convs)
 
@@ -88,9 +101,10 @@ def __init__(
 class UpCat(nn.Module):
     """upsampling, concatenation with the encoder feature map, two convolutions"""
 
+    @deprecated_arg(name="dim", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead.")
     def __init__(
         self,
-        dim: int,
+        spatial_dims: int,
         in_chns: int,
         cat_chns: int,
         out_chns: int,
@@ -103,10 +117,11 @@ def __init__(
         interp_mode: str = "linear",
         align_corners: Optional[bool] = True,
         halves: bool = True,
+        dim: Optional[int] = None,
     ):
         """
         Args:
-            dim: number of spatial dimensions.
+            spatial_dims: number of spatial dimensions.
             in_chns: number of input channels to be upsampled.
             cat_chns: number of channels from the decoder.
             out_chns: number of output channels.
@@ -124,14 +139,19 @@ def __init__(
                 Only used in the "nontrainable" mode.
             halves: whether to halve the number of channels during upsampling.
                 This parameter does not work on ``nontrainable`` mode if ``pre_conv`` is `None`.
+
+        .. deprecated:: 0.6.0
+            ``dim`` is deprecated, use ``spatial_dims`` instead.
         """
         super().__init__()
+        if dim is not None:
+            spatial_dims = dim
         if upsample == "nontrainable" and pre_conv is None:
             up_chns = in_chns
         else:
             up_chns = in_chns // 2 if halves else in_chns
         self.upsample = UpSample(
-            dim,
+            spatial_dims,
             in_chns,
             up_chns,
             2,
@@ -140,7 +160,7 @@ def __init__(
             interp_mode=interp_mode,
             align_corners=align_corners,
         )
-        self.convs = TwoConv(dim, cat_chns + up_chns, out_chns, act, norm, bias, dropout)
+        self.convs = TwoConv(spatial_dims, cat_chns + up_chns, out_chns, act, norm, bias, dropout)
 
     def forward(self, x: torch.Tensor, x_e: Optional[torch.Tensor]):
         """
@@ -167,9 +187,12 @@ def forward(self, x: torch.Tensor, x_e: Optional[torch.Tensor]):
 
 
 class BasicUNet(nn.Module):
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int = 3,
+        spatial_dims: int = 3,
         in_channels: int = 1,
         out_channels: int = 2,
         features: Sequence[int] = (32, 32, 64, 128, 256, 32),
@@ -178,6 +201,7 @@ def __init__(
         bias: bool = True,
         dropout: Union[float, tuple] = 0.0,
         upsample: str = "deconv",
+        dimensions: Optional[int] = None,
     ):
         """
         A UNet implementation with 1D/2D/3D supports.
@@ -189,7 +213,7 @@ def __init__(
             http://dx.doi.org/10.1038/s41592-018-0261-2
 
         Args:
-            dimensions: number of spatial dimensions. Defaults to 3 for spatial 3D inputs.
+            spatial_dims: number of spatial dimensions. Defaults to 3 for spatial 3D inputs.
             in_channels: number of input channels. Defaults to 1.
             out_channels: number of output channels. Defaults to 2.
             features: six integers as numbers of features.
@@ -207,16 +231,19 @@ def __init__(
             upsample: upsampling mode, available options are
                 ``"deconv"``, ``"pixelshuffle"``, ``"nontrainable"``.
 
+        .. deprecated:: 0.6.0
+            ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
         Examples::
 
             # for spatial 2D
-            >>> net = BasicUNet(dimensions=2, features=(64, 128, 256, 512, 1024, 128))
+            >>> net = BasicUNet(spatial_dims=2, features=(64, 128, 256, 512, 1024, 128))
 
             # for spatial 2D, with group norm
-            >>> net = BasicUNet(dimensions=2, features=(64, 128, 256, 512, 1024, 128), norm=("group", {"num_groups": 4}))
+            >>> net = BasicUNet(spatial_dims=2, features=(64, 128, 256, 512, 1024, 128), norm=("group", {"num_groups": 4}))
 
             # for spatial 3D
-            >>> net = BasicUNet(dimensions=3, features=(32, 32, 64, 128, 256, 32))
+            >>> net = BasicUNet(spatial_dims=3, features=(32, 32, 64, 128, 256, 32))
 
         See Also
 
@@ -225,22 +252,24 @@ def __init__(
 
         """
         super().__init__()
+        if dimensions is not None:
+            spatial_dims = dimensions
 
         fea = ensure_tuple_rep(features, 6)
         print(f"BasicUNet features: {fea}.")
 
-        self.conv_0 = TwoConv(dimensions, in_channels, features[0], act, norm, bias, dropout)
-        self.down_1 = Down(dimensions, fea[0], fea[1], act, norm, bias, dropout)
-        self.down_2 = Down(dimensions, fea[1], fea[2], act, norm, bias, dropout)
-        self.down_3 = Down(dimensions, fea[2], fea[3], act, norm, bias, dropout)
-        self.down_4 = Down(dimensions, fea[3], fea[4], act, norm, bias, dropout)
+        self.conv_0 = TwoConv(spatial_dims, in_channels, features[0], act, norm, bias, dropout)
+        self.down_1 = Down(spatial_dims, fea[0], fea[1], act, norm, bias, dropout)
+        self.down_2 = Down(spatial_dims, fea[1], fea[2], act, norm, bias, dropout)
+        self.down_3 = Down(spatial_dims, fea[2], fea[3], act, norm, bias, dropout)
+        self.down_4 = Down(spatial_dims, fea[3], fea[4], act, norm, bias, dropout)
 
-        self.upcat_4 = UpCat(dimensions, fea[4], fea[3], fea[3], act, norm, bias, dropout, upsample)
-        self.upcat_3 = UpCat(dimensions, fea[3], fea[2], fea[2], act, norm, bias, dropout, upsample)
-        self.upcat_2 = UpCat(dimensions, fea[2], fea[1], fea[1], act, norm, bias, dropout, upsample)
-        self.upcat_1 = UpCat(dimensions, fea[1], fea[0], fea[5], act, norm, bias, dropout, upsample, halves=False)
+        self.upcat_4 = UpCat(spatial_dims, fea[4], fea[3], fea[3], act, norm, bias, dropout, upsample)
+        self.upcat_3 = UpCat(spatial_dims, fea[3], fea[2], fea[2], act, norm, bias, dropout, upsample)
+        self.upcat_2 = UpCat(spatial_dims, fea[2], fea[1], fea[1], act, norm, bias, dropout, upsample)
+        self.upcat_1 = UpCat(spatial_dims, fea[1], fea[0], fea[5], act, norm, bias, dropout, upsample, halves=False)
 
-        self.final_conv = Conv["conv", dimensions](fea[5], out_channels, kernel_size=1)
+        self.final_conv = Conv["conv", spatial_dims](fea[5], out_channels, kernel_size=1)
 
     def forward(self, x: torch.Tensor):
         """
diff --git a/monai/networks/nets/classifier.py b/monai/networks/nets/classifier.py
index 92fee4f566..a1f913ea23 100644
--- a/monai/networks/nets/classifier.py
+++ b/monai/networks/nets/classifier.py
@@ -25,6 +25,19 @@ class Classifier(Regressor):
     Defines a classification network from Regressor by specifying the output shape as a single dimensional tensor
     with size equal to the number of classes to predict. The final activation function can also be specified, eg.
     softmax or sigmoid.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        classes: integer stating the dimension of the final output tensor
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+        last_act: name defining the last activation layer
     """
 
     def __init__(
@@ -41,20 +54,6 @@ def __init__(
         bias: bool = True,
         last_act: Optional[str] = None,
     ) -> None:
-        """
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            classes: integer stating the dimension of the final output tensor
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-            last_act: name defining the last activation layer
-        """
         super().__init__(in_shape, (classes,), channels, strides, kernel_size, num_res_units, act, norm, dropout, bias)
 
         if last_act is not None:
@@ -68,6 +67,18 @@ class Discriminator(Classifier):
     """
     Defines a discriminator network from Classifier with a single output value and sigmoid activation by default. This
     is meant for use with GANs or other applications requiring a generic discriminator network.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+        last_act: name defining the last activation layer
     """
 
     def __init__(
@@ -83,19 +94,6 @@ def __init__(
         bias: bool = True,
         last_act=Act.SIGMOID,
     ) -> None:
-        """
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-            last_act: name defining the last activation layer
-        """
         super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, last_act)
 
 
@@ -104,6 +102,17 @@ class Critic(Classifier):
     Defines a critic network from Classifier with a single output value and no final activation. The final layer is
     `nn.Flatten` instead of `nn.Linear`, the final result is computed as the mean over the first dimension. This is
     meant to be used with Wasserstein GANs.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
     """
 
     def __init__(
@@ -118,18 +127,6 @@ def __init__(
         dropout: Optional[float] = 0.25,
         bias: bool = True,
     ) -> None:
-        """
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-        """
         super().__init__(in_shape, 1, channels, strides, kernel_size, num_res_units, act, norm, dropout, bias, None)
 
     def _get_final_layer(self, in_shape: Sequence[int]):
diff --git a/monai/networks/nets/densenet.py b/monai/networks/nets/densenet.py
index e9f3b6d33e..e6d1cc68e6 100644
--- a/monai/networks/nets/densenet.py
+++ b/monai/networks/nets/densenet.py
@@ -62,7 +62,7 @@ def __init__(
             act: activation type and arguments. Defaults to relu.
             norm: feature normalization type and arguments. Defaults to batch norm.
         """
-        super(_DenseLayer, self).__init__()
+        super().__init__()
 
         out_channels = bn_size * growth_rate
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
@@ -110,7 +110,7 @@ def __init__(
             act: activation type and arguments. Defaults to relu.
             norm: feature normalization type and arguments. Defaults to batch norm.
         """
-        super(_DenseBlock, self).__init__()
+        super().__init__()
         for i in range(layers):
             layer = _DenseLayer(spatial_dims, in_channels, growth_rate, bn_size, dropout_prob, act=act, norm=norm)
             in_channels += growth_rate
@@ -134,7 +134,7 @@ def __init__(
             act: activation type and arguments. Defaults to relu.
             norm: feature normalization type and arguments. Defaults to batch norm.
         """
-        super(_Transition, self).__init__()
+        super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
         pool_type: Callable = Pool[Pool.AVG, spatial_dims]
@@ -178,7 +178,7 @@ def __init__(
         dropout_prob: float = 0.0,
     ) -> None:
 
-        super(DenseNet, self).__init__()
+        super().__init__()
 
         conv_type: Type[Union[nn.Conv1d, nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
         pool_type: Type[Union[nn.MaxPool1d, nn.MaxPool2d, nn.MaxPool3d]] = Pool[Pool.MAX, spatial_dims]
@@ -299,14 +299,18 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet121, self).__init__(
+        super().__init__(
             init_features=init_features,
             growth_rate=growth_rate,
             block_config=block_config,
             **kwargs,
         )
         if pretrained:
-            # it only worked when `spatial_dims` is 2
+            if kwargs["spatial_dims"] > 2:
+                raise NotImplementedError(
+                    "Parameter `spatial_dims` is > 2 ; currently PyTorch Hub does not"
+                    "provide pretrained models for more than two spatial dimensions."
+                )
             _load_state_dict(self, "densenet121", progress)
 
 
@@ -322,14 +326,18 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet169, self).__init__(
+        super().__init__(
             init_features=init_features,
             growth_rate=growth_rate,
             block_config=block_config,
             **kwargs,
         )
         if pretrained:
-            # it only worked when `spatial_dims` is 2
+            if kwargs["spatial_dims"] > 2:
+                raise NotImplementedError(
+                    "Parameter `spatial_dims` is > 2 ; currently PyTorch Hub does not"
+                    "provide pretrained models for more than two spatial dimensions."
+                )
             _load_state_dict(self, "densenet169", progress)
 
 
@@ -345,14 +353,18 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet201, self).__init__(
+        super().__init__(
             init_features=init_features,
             growth_rate=growth_rate,
             block_config=block_config,
             **kwargs,
         )
         if pretrained:
-            # it only worked when `spatial_dims` is 2
+            if kwargs["spatial_dims"] > 2:
+                raise NotImplementedError(
+                    "Parameter `spatial_dims` is > 2 ; currently PyTorch Hub does not"
+                    "provide pretrained models for more than two spatial dimensions."
+                )
             _load_state_dict(self, "densenet201", progress)
 
 
@@ -363,12 +375,12 @@ def __init__(
         self,
         init_features: int = 64,
         growth_rate: int = 32,
-        block_config: Sequence[int] = (6, 12, 48, 32),
+        block_config: Sequence[int] = (6, 12, 64, 48),
         pretrained: bool = False,
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(DenseNet264, self).__init__(
+        super().__init__(
             init_features=init_features,
             growth_rate=growth_rate,
             block_config=block_config,
diff --git a/monai/networks/nets/dynunet.py b/monai/networks/nets/dynunet.py
index 4af70b22c7..fb1d55d2cc 100644
--- a/monai/networks/nets/dynunet.py
+++ b/monai/networks/nets/dynunet.py
@@ -86,6 +86,7 @@ class DynUNet(nn.Module):
         strides: convolution strides for each blocks.
         upsample_kernel_size: convolution kernel size for transposed convolution layers. The values should
             equal to strides[1:].
+        dropout: dropout ratio. Defaults to no dropout.
         norm_name: feature normalization type and arguments. Defaults to ``INSTANCE``.
         deep_supervision: whether to add deep supervision head before output. Defaults to ``False``.
             If ``True``, in training mode, the forward function will output not only the last feature
@@ -115,12 +116,13 @@ def __init__(
         kernel_size: Sequence[Union[Sequence[int], int]],
         strides: Sequence[Union[Sequence[int], int]],
         upsample_kernel_size: Sequence[Union[Sequence[int], int]],
+        dropout: Optional[Union[Tuple, str, float]] = None,
         norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
         deep_supervision: bool = False,
         deep_supr_num: int = 1,
         res_block: bool = False,
     ):
-        super(DynUNet, self).__init__()
+        super().__init__()
         self.spatial_dims = spatial_dims
         self.in_channels = in_channels
         self.out_channels = out_channels
@@ -128,6 +130,7 @@ def __init__(
         self.strides = strides
         self.upsample_kernel_size = upsample_kernel_size
         self.norm_name = norm_name
+        self.dropout = dropout
         self.conv_block = UnetResBlock if res_block else UnetBasicBlock
         self.filters = [min(2 ** (5 + i), 320 if spatial_dims == 3 else 512) for i in range(len(strides))]
         self.input_block = self.get_input_block()
@@ -184,17 +187,17 @@ def create_skips(index, downsamples, upsamples, superheads, bottleneck):
     def check_kernel_stride(self):
         kernels, strides = self.kernel_size, self.strides
         error_msg = "length of kernel_size and strides should be the same, and no less than 3."
-        if not (len(kernels) == len(strides) and len(kernels) >= 3):
+        if len(kernels) != len(strides) or len(kernels) < 3:
             raise AssertionError(error_msg)
 
         for idx, k_i in enumerate(kernels):
             kernel, stride = k_i, strides[idx]
             if not isinstance(kernel, int):
-                error_msg = "length of kernel_size in block {} should be the same as spatial_dims.".format(idx)
+                error_msg = f"length of kernel_size in block {idx} should be the same as spatial_dims."
                 if len(kernel) != self.spatial_dims:
                     raise AssertionError(error_msg)
             if not isinstance(stride, int):
-                error_msg = "length of stride in block {} should be the same as spatial_dims.".format(idx)
+                error_msg = f"length of stride in block {idx} should be the same as spatial_dims."
                 if len(stride) != self.spatial_dims:
                     raise AssertionError(error_msg)
 
@@ -225,6 +228,7 @@ def get_input_block(self):
             self.kernel_size[0],
             self.strides[0],
             self.norm_name,
+            dropout=self.dropout,
         )
 
     def get_bottleneck(self):
@@ -235,14 +239,11 @@ def get_bottleneck(self):
             self.kernel_size[-1],
             self.strides[-1],
             self.norm_name,
+            dropout=self.dropout,
         )
 
     def get_output_block(self, idx: int):
-        return UnetOutBlock(
-            self.spatial_dims,
-            self.filters[idx],
-            self.out_channels,
-        )
+        return UnetOutBlock(self.spatial_dims, self.filters[idx], self.out_channels, dropout=self.dropout)
 
     def get_downsamples(self):
         inp, out = self.filters[:-2], self.filters[1:-1]
@@ -276,6 +277,7 @@ def get_module_list(
                     "kernel_size": kernel,
                     "stride": stride,
                     "norm_name": self.norm_name,
+                    "dropout": self.dropout,
                     "upsample_kernel_size": up_kernel,
                 }
                 layer = conv_block(**params)
@@ -289,6 +291,7 @@ def get_module_list(
                     "kernel_size": kernel,
                     "stride": stride,
                     "norm_name": self.norm_name,
+                    "dropout": self.dropout,
                 }
                 layer = conv_block(**params)
                 layers.append(layer)
diff --git a/monai/networks/nets/dynunet_v1.py b/monai/networks/nets/dynunet_v1.py
index feb05d1762..4c910157c9 100644
--- a/monai/networks/nets/dynunet_v1.py
+++ b/monai/networks/nets/dynunet_v1.py
@@ -24,7 +24,7 @@
 
 @deprecated(
     since="0.6.0",
-    removed="0.7.0",
+    removed="0.8.0",
     msg_suffix="This module is for backward compatibility purpose only. Please use `DynUNet` instead.",
 )
 class DynUNetV1(DynUNet):
@@ -38,6 +38,7 @@ class DynUNetV1(DynUNet):
         kernel_size: convolution kernel size.
         strides: convolution strides for each blocks.
         upsample_kernel_size: convolution kernel size for transposed convolution layers.
+        dropout: dropout ratio. Defaults to no dropout.
         norm_name: [``"batch"``, ``"instance"``, ``"group"``]. Defaults to "instance".
         deep_supervision: whether to add deep supervision head before output. Defaults to ``False``.
         deep_supr_num: number of feature maps that will output during deep supervision head. Defaults to 1.
@@ -57,6 +58,7 @@ def __init__(
         kernel_size: Sequence[Union[Sequence[int], int]],
         strides: Sequence[Union[Sequence[int], int]],
         upsample_kernel_size: Sequence[Union[Sequence[int], int]],
+        dropout: float = 0.0,
         norm_name: str = "instance",
         deep_supervision: bool = False,
         deep_supr_num: int = 1,
@@ -70,6 +72,7 @@ def __init__(
         self.strides = strides
         self.upsample_kernel_size = upsample_kernel_size
         self.norm_name = norm_name
+        self.dropout = dropout
         self.conv_block = _UnetResBlockV1 if res_block else _UnetBasicBlockV1  # type: ignore
         self.filters = [min(2 ** (5 + i), 320 if spatial_dims == 3 else 512) for i in range(len(strides))]
         self.input_block = self.get_input_block()
diff --git a/monai/networks/nets/efficientnet.py b/monai/networks/nets/efficientnet.py
index 453916758a..7de7e4e521 100644
--- a/monai/networks/nets/efficientnet.py
+++ b/monai/networks/nets/efficientnet.py
@@ -534,7 +534,7 @@ def __init__(
         weight_coeff, depth_coeff, image_size, dropout_rate, dropconnect_rate = efficientnet_params[model_name]
 
         # create model and initialize random weights
-        super(EfficientNetBN, self).__init__(
+        super().__init__(
             blocks_args_str=blocks_args_str,
             spatial_dims=spatial_dims,
             in_channels=in_channels,
@@ -594,7 +594,7 @@ def __init__(
         weight_coeff, depth_coeff, image_size, dropout_rate, dropconnect_rate = efficientnet_params[model_name]
 
         # create model and initialize random weights
-        super(EfficientNetBNFeatures, self).__init__(
+        super().__init__(
             blocks_args_str=blocks_args_str,
             spatial_dims=spatial_dims,
             in_channels=in_channels,
@@ -677,7 +677,7 @@ def drop_connect(inputs: torch.Tensor, p: float, training: bool) -> torch.Tensor
         output: output tensor after applying drop connection.
     """
     if p < 0.0 or p > 1.0:
-        raise ValueError("p must be in range of [0, 1], found {}".format(p))
+        raise ValueError(f"p must be in range of [0, 1], found {p}")
 
     # eval mode: drop_connect is switched off - so return input without modifying
     if not training:
@@ -708,7 +708,7 @@ def _load_state_dict(model: nn.Module, arch: str, progress: bool, adv_prop: bool
         arch = arch.split("efficientnet-")[-1] + "-ap"
     model_url = look_up_option(arch, url_map, None)
     if model_url is None:
-        print("pretrained weights of {} is not provided".format(arch))
+        print(f"pretrained weights of {arch} is not provided")
     else:
         # load state dict from url
         model_url = url_map[arch]
@@ -852,7 +852,7 @@ def _calculate_output_image_size(input_image_size: List[int], stride: Union[int,
     if isinstance(stride, tuple):
         all_strides_equal = all(stride[0] == s for s in stride)
         if not all_strides_equal:
-            raise ValueError("unequal strides are not possible, got {}".format(stride))
+            raise ValueError(f"unequal strides are not possible, got {stride}")
 
         stride = stride[0]
 
diff --git a/monai/networks/nets/fullyconnectednet.py b/monai/networks/nets/fullyconnectednet.py
index b906bab015..19197bd58d 100644
--- a/monai/networks/nets/fullyconnectednet.py
+++ b/monai/networks/nets/fullyconnectednet.py
@@ -30,9 +30,24 @@ def _get_adn_layer(
 
 class FullyConnectedNet(nn.Sequential):
     """
-    Plain full-connected layer neural network
+    Simple full-connected layer neural network composed of a sequence of linear layers with PReLU activation and
+    dropout.  The network accepts input with `in_channels` channels, has output with `out_channels` channels, and
+    hidden layer output channels given in `hidden_channels`. If `bias` is True then linear units have a bias term.
+
+    Args:
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        hidden_channels: number of output channels for each hidden layer.
+        dropout: dropout ratio. Defaults to no dropout.
+        act: activation type and arguments. Defaults to PReLU.
+        bias: whether to have a bias term in linear units. Defaults to True.
+        adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`.
+
+    Examples::
+
+        # accepts 4 values and infers 3 values as output, has 3 hidden layers with 10, 20, 10 values as output
+        net = FullyConnectedNet(4, 3, [10, 20, 10], dropout=0.2)
 
-    The network uses dropout and, by default, PReLU activation
     """
 
     def __init__(
@@ -53,8 +68,11 @@ def __init__(
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.hidden_channels = list(hidden_channels)
+        self.act = act
+        self.dropout = dropout
+        self.adn_ordering = adn_ordering
+
         self.add_module("flatten", nn.Flatten())
-        self.adn_layer = _get_adn_layer(act, dropout, adn_ordering)
 
         prev_channels = self.in_channels
         for i, c in enumerate(hidden_channels):
@@ -64,13 +82,34 @@ def __init__(
         self.add_module("output", nn.Linear(prev_channels, out_channels, bias))
 
     def _get_layer(self, in_channels: int, out_channels: int, bias: bool) -> nn.Sequential:
-        seq = nn.Sequential(nn.Linear(in_channels, out_channels, bias))
-        seq.add_module("ADN", self.adn_layer)
+        seq = nn.Sequential(
+            nn.Linear(in_channels, out_channels, bias), _get_adn_layer(self.act, self.dropout, self.adn_ordering)
+        )
         return seq
 
 
 class VarFullyConnectedNet(nn.Module):
-    """Variational fully-connected network."""
+    """
+    Variational fully-connected network. This is composed of an encode layer, reparameterization layer, and then a
+    decode layer.
+
+    Args:
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        latent_size: number of latent variables to use.
+        encode_channels: number of output channels for each hidden layer of the encode half.
+        decode_channels: number of output channels for each hidden layer of the decode half.
+        dropout: dropout ratio. Defaults to no dropout.
+        act: activation type and arguments. Defaults to PReLU.
+        bias: whether to have a bias term in linear units. Defaults to True.
+        adn_ordering: order of operations in :py:class:`monai.networks.blocks.ADN`.
+
+    Examples::
+
+        # accepts inputs with 4 values, uses a latent space of 2 variables, and produces outputs of 3 values
+        net = VarFullyConnectedNet(4, 3, 2, [5, 10], [10, 5])
+
+    """
 
     def __init__(
         self,
diff --git a/monai/networks/nets/generator.py b/monai/networks/nets/generator.py
index 1f24944a63..90aa26cd01 100644
--- a/monai/networks/nets/generator.py
+++ b/monai/networks/nets/generator.py
@@ -25,13 +25,35 @@ class Generator(nn.Module):
     """
     Defines a simple generator network accepting a latent vector and through a sequence of convolution layers
     constructs an output tensor of greater size and high dimensionality. The method `_get_layer` is used to
-    create each of these layers, override this method to define layers beyond the default Convolution or
-    ResidualUnit layers.
+    create each of these layers, override this method to define layers beyond the default
+    :py:class:`monai.networks.blocks.Convolution` or :py:class:`monai.networks.blocks.ResidualUnit` layers.
+
+    The layers are constructed using the values in the `channels` and `strides` arguments, the number being defined by
+    the length of these (which must match). Input is first passed through a :py:class:`torch.nn.Linear` layer to
+    convert the input vector to an image tensor with dimensions `start_shape`. This passes through the convolution
+    layers and is progressively upsampled if the `strides` valus are greater than 1 using transpose convolutions. The
+    size of the final output is defined by the `start_shape` dimension and the amount of upsampling done through
+    strides. In the default definition the size of the output's spatial dimensions will be that of `start_shape`
+    multiplied by the product of `strides`, thus the example network below upsamples an starting size of (64, 8, 8)
+    to (1, 64, 64) since its `strides` are (2, 2, 2).
+
+    Args:
+        latent_shape: tuple of integers stating the dimension of the input latent vector (minus batch dimension)
+        start_shape: tuple of integers stating the dimension of the tensor to pass to convolution subnetwork
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (upscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+
+    Examples::
+
+        # 3 layers, latent input vector of shape (42, 24), output volume of shape (1, 64, 64)
+        net = Generator((42, 24), (64, 8, 8), (32, 16, 1), (2, 2, 2))
 
-    For example, a generator accepting a latent vector if shape (42,24) and producing an output volume of
-    shape (1,64,64) can be constructed as:
-
-        gen = Generator((42, 24), (64, 8, 8), (32, 16, 1), (2, 2, 2))
     """
 
     def __init__(
@@ -47,26 +69,6 @@ def __init__(
         dropout: Optional[float] = None,
         bias: bool = True,
     ) -> None:
-        """
-        Construct the generator network with the number of layers defined by `channels` and `strides`. In the
-        forward pass a `nn.Linear` layer relates the input latent vector to a tensor of dimensions `start_shape`,
-        this is then fed forward through the sequence of convolutional layers. The number of layers is defined by
-        the length of `channels` and `strides` which must match, each layer having the number of output channels
-        given in `channels` and an upsample factor given in `strides` (ie. a transpose convolution with that stride
-        size).
-
-        Args:
-            latent_shape: tuple of integers stating the dimension of the input latent vector (minus batch dimension)
-            start_shape: tuple of integers stating the dimension of the tensor to pass to convolution subnetwork
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (upscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-        """
         super().__init__()
 
         self.in_channels, *self.start_shape = ensure_tuple(start_shape)
@@ -112,7 +114,7 @@ def _get_layer(
             strides=strides,
             is_transposed=True,
             conv_only=is_last or self.num_res_units > 0,
-            dimensions=self.dimensions,
+            spatial_dims=self.dimensions,
             out_channels=out_channels,
             kernel_size=self.kernel_size,
             act=self.act,
@@ -126,7 +128,7 @@ def _get_layer(
                 in_channels=out_channels,
                 subunits=self.num_res_units,
                 last_conv_only=is_last,
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 out_channels=out_channels,
                 kernel_size=self.kernel_size,
                 act=self.act,
diff --git a/monai/networks/nets/highresnet.py b/monai/networks/nets/highresnet.py
index f644a7835a..2937cda32a 100644
--- a/monai/networks/nets/highresnet.py
+++ b/monai/networks/nets/highresnet.py
@@ -70,7 +70,7 @@ def __init__(
             ValueError: When ``channel_matching=pad`` and ``in_channels > out_channels``. Incompatible values.
 
         """
-        super(HighResBlock, self).__init__()
+        super().__init__()
         self.chn_pad = ChannelPad(
             spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels, mode=channel_matching
         )
@@ -84,7 +84,7 @@ def __init__(
             )
             layers.append(
                 Convolution(
-                    dimensions=spatial_dims,
+                    spatial_dims=spatial_dims,
                     in_channels=_in_chns,
                     out_channels=_out_chns,
                     kernel_size=kernel_size,
@@ -146,7 +146,7 @@ def __init__(
         channel_matching: Union[ChannelMatching, str] = ChannelMatching.PAD,
     ) -> None:
 
-        super(HighResNet, self).__init__()
+        super().__init__()
         blocks = nn.ModuleList()
 
         # initial conv layer
@@ -154,7 +154,7 @@ def __init__(
         _in_chns, _out_chns = in_channels, params["n_features"]
         blocks.append(
             Convolution(
-                dimensions=spatial_dims,
+                spatial_dims=spatial_dims,
                 in_channels=_in_chns,
                 out_channels=_out_chns,
                 kernel_size=params["kernel_size"],
@@ -190,7 +190,7 @@ def __init__(
         _in_chns, _out_chns = _out_chns, params["n_features"]
         blocks.append(
             Convolution(
-                dimensions=spatial_dims,
+                spatial_dims=spatial_dims,
                 in_channels=_in_chns,
                 out_channels=_out_chns,
                 kernel_size=params["kernel_size"],
@@ -206,7 +206,7 @@ def __init__(
         _in_chns = _out_chns
         blocks.append(
             Convolution(
-                dimensions=spatial_dims,
+                spatial_dims=spatial_dims,
                 in_channels=_in_chns,
                 out_channels=out_channels,
                 kernel_size=params["kernel_size"],
diff --git a/monai/networks/nets/netadapter.py b/monai/networks/nets/netadapter.py
index 80288f7945..13da7698d3 100644
--- a/monai/networks/nets/netadapter.py
+++ b/monai/networks/nets/netadapter.py
@@ -37,6 +37,9 @@ class NetAdapter(torch.nn.Module):
         bias: the bias value when replacing the last layer. if False, the layer will not learn an additive bias,
             default to True.
 
+    .. deprecated:: 0.6.0
+        ``n_classes`` is deprecated, use ``num_classes`` instead.
+
     """
 
     @deprecated_arg("n_classes", since="0.6")
diff --git a/monai/networks/nets/regressor.py b/monai/networks/nets/regressor.py
index 25acb9bfa5..bc8feb7527 100644
--- a/monai/networks/nets/regressor.py
+++ b/monai/networks/nets/regressor.py
@@ -29,6 +29,30 @@ class Regressor(nn.Module):
     This defines a network for relating large-sized input tensors to small output tensors, ie. regressing large
     values to a prediction. An output of a single dimension can be used as value regression or multi-label
     classification prediction, an output of a single value can be used as a discriminator or critic prediction.
+
+    The network is constructed as a sequence of layers, either :py:class:`monai.networks.blocks.Convolution` or
+    :py:class:`monai.networks.blocks.ResidualUnit`, with a final fully-connected layer resizing the output from the
+    blocks to the final size. Each block is defined with a stride value typically used to downsample the input using
+    strided convolutions. In this way each block progressively condenses information from the input into a deep
+    representation the final fully-connected layer relates to a final result.
+
+    Args:
+        in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
+        out_shape: tuple of integers stating the dimension of the final output tensor (minus batch dimension)
+        channels: tuple of integers stating the output channels of each convolutional layer
+        strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
+        kernel_size: integer or tuple of integers stating size of convolutional kernels
+        num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
+        act: name or type defining activation layers
+        norm: name or type defining normalization layers
+        dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
+        bias: boolean stating if convolution layers should have a bias component
+
+    Examples::
+
+        # infers a 2-value result (eg. a 2D cartesian coordinate) from a 64x64 image
+        net = Regressor((1, 64, 64), (2,), (2, 4, 8), (2, 2, 2))
+
     """
 
     def __init__(
@@ -44,23 +68,6 @@ def __init__(
         dropout: Optional[float] = None,
         bias: bool = True,
     ) -> None:
-        """
-        Construct the regressor network with the number of layers defined by `channels` and `strides`. Inputs are
-        first passed through the convolutional layers in the forward pass, the output from this is then pass
-        through a fully connected layer to relate them to the final output tensor.
-
-        Args:
-            in_shape: tuple of integers stating the dimension of the input tensor (minus batch dimension)
-            out_shape: tuple of integers stating the dimension of the final output tensor
-            channels: tuple of integers stating the output channels of each convolutional layer
-            strides: tuple of integers stating the stride (downscale factor) of each convolutional layer
-            kernel_size: integer or tuple of integers stating size of convolutional kernels
-            num_res_units: integer stating number of convolutions in residual units, 0 means no residual units
-            act: name or type defining activation layers
-            norm: name or type defining normalization layers
-            dropout: optional float value in range [0, 1] stating dropout probability for layers, None for no dropout
-            bias: boolean stating if convolution layers should have a bias component
-        """
         super().__init__()
 
         self.in_channels, *self.in_shape = ensure_tuple(in_shape)
@@ -107,7 +114,7 @@ def _get_layer(
             layer = ResidualUnit(
                 subunits=self.num_res_units,
                 last_conv_only=is_last,
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 in_channels=in_channels,
                 out_channels=out_channels,
                 strides=strides,
@@ -120,7 +127,7 @@ def _get_layer(
         else:
             layer = Convolution(
                 conv_only=is_last,
-                dimensions=self.dimensions,
+                spatial_dims=self.dimensions,
                 in_channels=in_channels,
                 out_channels=out_channels,
                 strides=strides,
diff --git a/monai/networks/nets/regunet.py b/monai/networks/nets/regunet.py
index 4cf747f650..b800364c67 100644
--- a/monai/networks/nets/regunet.py
+++ b/monai/networks/nets/regunet.py
@@ -67,7 +67,7 @@ def __init__(
             concat_skip: when up-sampling, concatenate skipped tensor if true, otherwise use addition
             encode_kernel_sizes: kernel size for down-sampling
         """
-        super(RegUNet, self).__init__()
+        super().__init__()
         if not extract_levels:
             extract_levels = (depth,)
         if max(extract_levels) != depth:
@@ -262,7 +262,7 @@ def __init__(
         decode_size: List[int],
         in_channels: int,
     ):
-        super(AffineHead, self).__init__()
+        super().__init__()
         self.spatial_dims = spatial_dims
         if spatial_dims == 2:
             in_features = in_channels * decode_size[0] * decode_size[1]
@@ -371,7 +371,7 @@ def __init__(
         in_channels: int,
         out_channels: int,
     ):
-        super(AdditiveUpSampleBlock, self).__init__()
+        super().__init__()
         self.deconv = get_deconv_block(spatial_dims=spatial_dims, in_channels=in_channels, out_channels=out_channels)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
diff --git a/monai/networks/nets/resnet.py b/monai/networks/nets/resnet.py
index a5e6b7ab81..1086a21e46 100644
--- a/monai/networks/nets/resnet.py
+++ b/monai/networks/nets/resnet.py
@@ -14,9 +14,10 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
 from monai.networks.layers.factories import Conv, Norm, Pool
+from monai.networks.layers.utils import get_pool_layer
+from monai.utils.module import look_up_option
 
 __all__ = ["ResNet", "resnet10", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152", "resnet200"]
 
@@ -58,7 +59,7 @@ def __init__(
             stride: stride to use for first conv layer.
             downsample: which downsample layer to use.
         """
-        super(ResNetBlock, self).__init__()
+        super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
         norm_type: Callable = Norm[Norm.BATCH, spatial_dims]
@@ -110,7 +111,7 @@ def __init__(
             downsample: which downsample layer to use.
         """
 
-        super(ResNetBottleneck, self).__init__()
+        super().__init__()
 
         conv_type: Callable = Conv[Conv.CONV, spatial_dims]
         norm_type: Callable = Norm[Norm.BATCH, spatial_dims]
@@ -162,9 +163,15 @@ class ResNet(nn.Module):
         conv1_t_size: size of first convolution layer, determines kernel and padding.
         conv1_t_stride: stride of first convolution layer.
         no_max_pool: bool argument to determine if to use maxpool layer.
-        shortcut_type: which downsample block to use.
+        shortcut_type: which downsample block to use. Options are 'A', 'B', default to 'B'.
+            - 'A': using `self._downsample_basic_block`.
+            - 'B': kernel_size 1 conv + norm.
         widen_factor: widen output for each layer.
         num_classes: number of output (classifications)
+
+    .. deprecated:: 0.6.0
+        ``n_classes`` is deprecated, use ``num_classes`` instead.
+
     """
 
     @deprecated_arg("n_classes", since="0.6")
@@ -185,7 +192,7 @@ def __init__(
         n_classes: Optional[int] = None,
     ) -> None:
 
-        super(ResNet, self).__init__()
+        super().__init__()
         # in case the new num_classes is default but you still call deprecated n_classes
         if n_classes is not None and num_classes == 400:
             num_classes = n_classes
@@ -198,7 +205,7 @@ def __init__(
         ]
 
         block_avgpool = get_avgpool()
-        conv1_kernel, conv1_stride, con1_padding = get_conv1(conv1_t_size, conv1_t_stride)
+        conv1_kernel, conv1_stride, conv1_padding = get_conv1(conv1_t_size, conv1_t_stride)
         block_inplanes = [int(x * widen_factor) for x in block_inplanes]
 
         self.in_planes = block_inplanes[0]
@@ -209,7 +216,7 @@ def __init__(
             self.in_planes,
             kernel_size=conv1_kernel[spatial_dims],
             stride=conv1_stride[spatial_dims],
-            padding=con1_padding[spatial_dims],
+            padding=conv1_padding[spatial_dims],
             bias=False,
         )
         self.bn1 = norm_type(self.in_planes)
@@ -234,14 +241,9 @@ def __init__(
                 nn.init.constant_(torch.as_tensor(m.bias), 0)
 
     def _downsample_basic_block(self, x: torch.Tensor, planes: int, stride: int, spatial_dims: int = 3) -> torch.Tensor:
-        assert spatial_dims == 3
-        out: torch.Tensor = F.avg_pool3d(x, kernel_size=1, stride=stride)
-        zero_pads = torch.zeros(out.size(0), planes - out.size(1), out.size(2), out.size(3), out.size(4))
-        if isinstance(out.data, torch.FloatTensor):
-            zero_pads = zero_pads.cuda()
-
+        out: torch.Tensor = get_pool_layer(("avg", {"kernel_size": 1, "stride": stride}), spatial_dims=spatial_dims)(x)
+        zero_pads = torch.zeros(out.size(0), planes - out.size(1), *out.shape[2:], dtype=out.dtype, device=out.device)
         out = torch.cat([out.data, zero_pads], dim=1)
-
         return out
 
     def _make_layer(
@@ -259,9 +261,12 @@ def _make_layer(
 
         downsample: Union[nn.Module, partial, None] = None
         if stride != 1 or self.in_planes != planes * block.expansion:
-            if shortcut_type == "A":
+            if look_up_option(shortcut_type, {"A", "B"}) == "A":
                 downsample = partial(
-                    self._downsample_basic_block, planes=planes * block.expansion, kernel_size=1, stride=stride
+                    self._downsample_basic_block,
+                    planes=planes * block.expansion,
+                    stride=stride,
+                    spatial_dims=spatial_dims,
                 )
             else:
                 downsample = nn.Sequential(
@@ -269,12 +274,16 @@ def _make_layer(
                     norm_type(planes * block.expansion),
                 )
 
-        layers = []
-        layers.append(
+        layers = [
             block(
-                in_planes=self.in_planes, planes=planes, spatial_dims=spatial_dims, stride=stride, downsample=downsample
+                in_planes=self.in_planes,
+                planes=planes,
+                spatial_dims=spatial_dims,
+                stride=stride,
+                downsample=downsample,
             )
-        )
+        ]
+
         self.in_planes = planes * block.expansion
         for _i in range(1, blocks):
             layers.append(block(self.in_planes, planes, spatial_dims=spatial_dims))
diff --git a/monai/networks/nets/segresnet.py b/monai/networks/nets/segresnet.py
index 8be562aadd..cd7e89c7e0 100644
--- a/monai/networks/nets/segresnet.py
+++ b/monai/networks/nets/segresnet.py
@@ -226,7 +226,7 @@ def __init__(
         blocks_up: tuple = (1, 1, 1),
         upsample_mode: Union[UpsampleMode, str] = UpsampleMode.NONTRAINABLE,
     ):
-        super(SegResNetVAE, self).__init__(
+        super().__init__(
             spatial_dims=spatial_dims,
             init_filters=init_filters,
             in_channels=in_channels,
diff --git a/monai/networks/nets/senet.py b/monai/networks/nets/senet.py
index 9b7035c259..34582010e9 100644
--- a/monai/networks/nets/senet.py
+++ b/monai/networks/nets/senet.py
@@ -87,7 +87,7 @@ def __init__(
         num_classes: int = 1000,
     ) -> None:
 
-        super(SENet, self).__init__()
+        super().__init__()
 
         relu_type: Type[nn.ReLU] = Act[Act.RELU]
         conv_type: Type[Union[nn.Conv1d, nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
@@ -192,7 +192,7 @@ def _make_layer(
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = Convolution(
-                dimensions=self.spatial_dims,
+                spatial_dims=self.spatial_dims,
                 in_channels=self.inplanes,
                 out_channels=planes * block.expansion,
                 strides=stride,
@@ -317,7 +317,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SENet154, self).__init__(
+        super().__init__(
             block=SEBottleneck,
             layers=layers,
             groups=groups,
@@ -345,7 +345,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNet50, self).__init__(
+        super().__init__(
             block=SEResNetBottleneck,
             layers=layers,
             groups=groups,
@@ -378,7 +378,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNet101, self).__init__(
+        super().__init__(
             block=SEResNetBottleneck,
             layers=layers,
             groups=groups,
@@ -410,7 +410,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNet152, self).__init__(
+        super().__init__(
             block=SEResNetBottleneck,
             layers=layers,
             groups=groups,
@@ -443,7 +443,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNext50, self).__init__(
+        super().__init__(
             block=SEResNeXtBottleneck,
             layers=layers,
             groups=groups,
@@ -477,7 +477,7 @@ def __init__(
         progress: bool = True,
         **kwargs,
     ) -> None:
-        super(SEResNext101, self).__init__(
+        super().__init__(
             block=SEResNeXtBottleneck,
             layers=layers,
             groups=groups,
diff --git a/monai/networks/nets/torchvision_fc.py b/monai/networks/nets/torchvision_fc.py
index 1619f877e7..d5bd6e9f57 100644
--- a/monai/networks/nets/torchvision_fc.py
+++ b/monai/networks/nets/torchvision_fc.py
@@ -73,7 +73,7 @@ def __init__(
         )
 
 
-@deprecated(since="0.6.0", removed="0.7.0", msg_suffix="Please consider using `TorchVisionFCModel` instead.")
+@deprecated(since="0.6.0", removed="0.8.0", msg_suffix="Please consider using `TorchVisionFCModel` instead.")
 class TorchVisionFullyConvModel(TorchVisionFCModel):
     """
     Customize TorchVision models to replace fully connected layer by convolutional layer.
diff --git a/monai/networks/nets/transchex.py b/monai/networks/nets/transchex.py
new file mode 100644
index 0000000000..2e161cc95f
--- /dev/null
+++ b/monai/networks/nets/transchex.py
@@ -0,0 +1,398 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+import shutil
+import tarfile
+import tempfile
+from typing import Sequence, Tuple, Union
+
+import torch
+from torch import nn
+
+from monai.utils import optional_import
+
+transformers = optional_import("transformers")
+load_tf_weights_in_bert = optional_import("transformers", name="load_tf_weights_in_bert")
+cached_path = optional_import("transformers.file_utils", name="cached_path")[0]
+BertEmbeddings = optional_import("transformers.models.bert.modeling_bert", name="BertEmbeddings")[0]
+BertLayer = optional_import("transformers.models.bert.modeling_bert", name="BertLayer")[0]
+
+__all__ = [
+    "BertPreTrainedModel",
+    "BertAttention",
+    "BertOutput",
+    "BertMixedLayer",
+    "Pooler",
+    "MultiModal",
+    "Transchex",
+]
+
+
+class BertPreTrainedModel(nn.Module):
+    """Module to load BERT pre-trained weights.
+    Based on:
+    LXMERT
+    https://github.com/airsplay/lxmert
+    BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, *inputs, **kwargs) -> None:
+        super().__init__()
+
+    def init_bert_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
+        elif isinstance(module, torch.nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+        if isinstance(module, nn.Linear) and module.bias is not None:
+            module.bias.data.zero_()
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        num_language_layers,
+        num_vision_layers,
+        num_mixed_layers,
+        bert_config,
+        state_dict=None,
+        cache_dir=None,
+        from_tf=False,
+        *inputs,
+        **kwargs,
+    ):
+        archive_file = "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz"
+        resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
+        tempdir = None
+        if os.path.isdir(resolved_archive_file) or from_tf:
+            serialization_dir = resolved_archive_file
+        else:
+            tempdir = tempfile.mkdtemp()
+            with tarfile.open(resolved_archive_file, "r:gz") as archive:
+                archive.extractall(tempdir)
+            serialization_dir = tempdir
+        model = cls(num_language_layers, num_vision_layers, num_mixed_layers, bert_config, *inputs, **kwargs)
+        if state_dict is None and not from_tf:
+            weights_path = os.path.join(serialization_dir, "pytorch_model.bin")
+            state_dict = torch.load(weights_path, map_location="cpu" if not torch.cuda.is_available() else None)
+        if tempdir:
+            shutil.rmtree(tempdir)
+        if from_tf:
+            weights_path = os.path.join(serialization_dir, "model.ckpt")
+            return load_tf_weights_in_bert(model, weights_path)
+        old_keys = []
+        new_keys = []
+        for key in state_dict.keys():
+            new_key = None
+            if "gamma" in key:
+                new_key = key.replace("gamma", "weight")
+            if "beta" in key:
+                new_key = key.replace("beta", "bias")
+            if new_key:
+                old_keys.append(key)
+                new_keys.append(new_key)
+        for old_key, new_key in zip(old_keys, new_keys):
+            state_dict[new_key] = state_dict.pop(old_key)
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+        metadata = getattr(state_dict, "_metadata", None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=""):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs
+            )
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + ".")
+
+        start_prefix = ""
+        if not hasattr(model, "bert") and any(s.startswith("bert.") for s in state_dict.keys()):
+            start_prefix = "bert."
+        load(model, prefix=start_prefix)
+        return model
+
+
+class BertAttention(nn.Module):
+    """BERT attention layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(
+        self,
+        config,
+    ) -> None:
+        super().__init__()
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
+
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    def forward(self, hidden_states, context):
+        mixed_query_layer = self.query(hidden_states)
+        mixed_key_layer = self.key(context)
+        mixed_value_layer = self.value(context)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        attention_probs = self.dropout(nn.Softmax(dim=-1)(attention_scores))
+        context_layer = torch.matmul(attention_probs, value_layer)
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        return context_layer
+
+
+class BertOutput(nn.Module):
+    """BERT output layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(self, config) -> None:
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = torch.nn.LayerNorm(config.hidden_size, eps=1e-12)
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class BertMixedLayer(nn.Module):
+    """BERT cross attention layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(
+        self,
+        config,
+    ) -> None:
+        super().__init__()
+        self.att = BertAttention(config)
+        self.output = BertOutput(config)
+
+    def forward(self, x, y):
+        output = self.att(x, y)
+        return self.output(output, x)
+
+
+class Pooler(nn.Module):
+    """BERT pooler layer.
+    Based on: BERT (pytorch-transformer)
+    https://github.com/huggingface/transformers
+    """
+
+    def __init__(
+        self,
+        hidden_size,
+    ) -> None:
+        super().__init__()
+        self.dense = nn.Linear(hidden_size, hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states):
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class MultiModal(BertPreTrainedModel):
+    """
+    Multimodal Transformers From Pretrained BERT Weights"
+    """
+
+    def __init__(
+        self,
+        num_language_layers: int,
+        num_vision_layers: int,
+        num_mixed_layers: int,
+        bert_config: dict,  # type: ignore
+    ) -> None:
+        """
+        Args:
+            num_language_layers: number of language transformer layers.
+            num_vision_layers: number of vision transformer layers.
+            bert_config: configuration for bert language transformer encoder.
+
+        """
+        super().__init__()
+        self.config = type("obj", (object,), bert_config)
+        self.embeddings = BertEmbeddings(self.config)
+        self.language_encoder = nn.ModuleList([BertLayer(self.config) for _ in range(num_language_layers)])
+        self.vision_encoder = nn.ModuleList([BertLayer(self.config) for _ in range(num_vision_layers)])
+        self.mixed_encoder = nn.ModuleList([BertMixedLayer(self.config) for _ in range(num_mixed_layers)])
+        self.apply(self.init_bert_weights)
+
+    def forward(self, input_ids, token_type_ids=None, vision_feats=None, attention_mask=None):
+        language_features = self.embeddings(input_ids, token_type_ids)
+        for layer in self.vision_encoder:
+            hidden_state_vision = layer(vision_feats, None)[0]
+        for layer in self.language_encoder:
+            hidden_state_language = layer(language_features, attention_mask)[0]
+        for layer in self.mixed_encoder:
+            hidden_state_mixed = layer(hidden_state_language, hidden_state_vision)
+        return hidden_state_mixed
+
+
+class Transchex(torch.nn.Module):
+    """
+    TransChex based on: "Hatamizadeh et al.,TransCheX: Self-Supervised Pretraining of Vision-Language
+    Transformers for Chest X-ray Analysis"
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        img_size: Union[Sequence[int], int],  # type: ignore
+        patch_size: Union[int, Tuple[int, int]],  # type: ignore
+        num_classes: int,
+        num_language_layers: int,
+        num_vision_layers: int,
+        num_mixed_layers: int,
+        hidden_size: int = 768,
+        drop_out: float = 0.0,
+        attention_probs_dropout_prob: float = 0.1,
+        gradient_checkpointing: bool = False,
+        hidden_act: str = "gelu",
+        hidden_dropout_prob: float = 0.1,
+        initializer_range: float = 0.02,
+        intermediate_size: int = 3072,
+        layer_norm_eps: float = 1e-12,
+        max_position_embeddings: int = 512,
+        model_type: str = "bert",
+        num_attention_heads: int = 12,
+        num_hidden_layers: int = 12,
+        pad_token_id: int = 0,
+        position_embedding_type: str = "absolute",
+        transformers_version: str = "4.10.2",
+        type_vocab_size: int = 2,
+        use_cache: bool = True,
+        vocab_size: int = 30522,
+        chunk_size_feed_forward: int = 0,
+        is_decoder: bool = False,
+        add_cross_attention: bool = False,
+    ) -> None:
+        """
+        Args:
+            in_channels: dimension of input channels.
+            img_size: dimension of input image.
+            patch_size: dimension of patch size.
+            num_classes: number of classes if classification is used.
+            num_language_layers: number of language transformer layers.
+            num_vision_layers: number of vision transformer layers.
+            num_mixed_layers: number of mixed transformer layers.
+            drop_out: faction of the input units to drop.
+            bert_config: configuration for bert language transformer encoder.
+
+        Examples:
+
+        .. code-block:: python
+
+            # for 3-channel with image size of (224,224), patch size of (32,32), 3 classes, 2 language layers,
+            # 2 vision layers, 2 mixed modality layers and dropout of 0.2 in the classification head
+            net = Transchex(in_channels=3,
+                                 img_size=(224, 224),
+                                 num_classes=3,
+                                 num_language_layers=2,
+                                 num_vision_layers=2,
+                                 num_mixed_layers=2,
+                                 drop_out=0.2)
+
+        """
+        super().__init__()
+        bert_config = {
+            "attention_probs_dropout_prob": attention_probs_dropout_prob,
+            "classifier_dropout": None,
+            "gradient_checkpointing": gradient_checkpointing,
+            "hidden_act": hidden_act,
+            "hidden_dropout_prob": hidden_dropout_prob,
+            "hidden_size": hidden_size,
+            "initializer_range": initializer_range,
+            "intermediate_size": intermediate_size,
+            "layer_norm_eps": layer_norm_eps,
+            "max_position_embeddings": max_position_embeddings,
+            "model_type": model_type,
+            "num_attention_heads": num_attention_heads,
+            "num_hidden_layers": num_hidden_layers,
+            "pad_token_id": pad_token_id,
+            "position_embedding_type": position_embedding_type,
+            "transformers_version": transformers_version,
+            "type_vocab_size": type_vocab_size,
+            "use_cache": use_cache,
+            "vocab_size": vocab_size,
+            "chunk_size_feed_forward": chunk_size_feed_forward,
+            "is_decoder": is_decoder,
+            "add_cross_attention": add_cross_attention,
+        }
+        if not (0 <= drop_out <= 1):
+            raise ValueError("dropout_rate should be between 0 and 1.")
+
+        if (img_size[0] % patch_size[0] != 0) or (img_size[1] % patch_size[1] != 0):  # type: ignore
+            raise ValueError("img_size should be divisible by patch_size.")
+
+        self.multimodal = MultiModal.from_pretrained(
+            num_language_layers=num_language_layers,
+            num_vision_layers=num_vision_layers,
+            num_mixed_layers=num_mixed_layers,
+            bert_config=bert_config,
+        )
+
+        self.patch_size = patch_size
+        self.num_patches = (img_size[0] // self.patch_size[0]) * (img_size[1] // self.patch_size[1])  # type: ignore
+        self.vision_proj = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=hidden_size,
+            kernel_size=self.patch_size,  # type: ignore
+            stride=self.patch_size,  # type: ignore
+        )
+        self.norm_vision_pos = nn.LayerNorm(hidden_size)
+        self.pos_embed_vis = nn.Parameter(torch.zeros(1, self.num_patches, hidden_size))
+        self.pooler = Pooler(hidden_size=hidden_size)
+        self.drop = torch.nn.Dropout(drop_out)
+        self.cls_head = torch.nn.Linear(hidden_size, num_classes)
+
+    def forward(self, input_ids, token_type_ids=None, vision_feats=None):
+        attention_mask = torch.ones_like(input_ids).unsqueeze(1).unsqueeze(2)
+        attention_mask = attention_mask.to(dtype=next(self.parameters()).dtype)
+        attention_mask = (1.0 - attention_mask) * -10000.0
+        vision_feats = self.vision_proj(vision_feats).flatten(2).transpose(1, 2)
+        vision_feats = self.norm_vision_pos(vision_feats)
+        vision_feats = vision_feats + self.pos_embed_vis
+        hidden_state_mixed = self.multimodal(
+            input_ids=input_ids, token_type_ids=token_type_ids, vision_feats=vision_feats, attention_mask=attention_mask
+        )
+        pooled_features = self.pooler(hidden_state_mixed)
+        logits = self.cls_head(self.drop(pooled_features))
+        return logits
diff --git a/monai/networks/nets/unet.py b/monai/networks/nets/unet.py
index 70cc816fe9..7d5f979330 100644
--- a/monai/networks/nets/unet.py
+++ b/monai/networks/nets/unet.py
@@ -10,7 +10,7 @@
 # limitations under the License.
 
 import warnings
-from typing import Sequence, Tuple, Union
+from typing import Optional, Sequence, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -18,7 +18,7 @@
 from monai.networks.blocks.convolutions import Convolution, ResidualUnit
 from monai.networks.layers.factories import Act, Norm
 from monai.networks.layers.simplelayers import SkipConnection
-from monai.utils import alias, export
+from monai.utils import alias, deprecated_arg, export
 
 __all__ = ["UNet", "Unet", "unet"]
 
@@ -26,9 +26,91 @@
 @export("monai.networks.nets")
 @alias("Unet")
 class UNet(nn.Module):
+    """
+    Enhanced version of UNet which has residual units implemented with the ResidualUnit class.
+    The residual part uses a convolution to change the input dimensions to match the output dimensions
+    if this is necessary but will use nn.Identity if not.
+    Refer to: https://link.springer.com/chapter/10.1007/978-3-030-12029-0_40.
+
+    Each layer of the network has a encode and decode path with a skip connection between them. Data in the encode path
+    is downsampled using strided convolutions (if `strides` is given values greater than 1) and in the decode path
+    upsampled using strided transpose convolutions. These down or up sampling operations occur at the beginning of each
+    block rather than afterwards as is typical in UNet implementations.
+
+    To further explain this consider the first example network given below. This network has 3 layers with strides
+    of 2 for each of the middle layers (the last layer is the bottom connection which does not down/up sample). Input
+    data to this network is immediately reduced in the spatial dimensions by a factor of 2 by the first convolution of
+    the residual unit defining the first layer of the encode part. The last layer of the decode part will upsample its
+    input (data from the previous layer concatenated with data from the skip connection) in the first convolution. this
+    ensures the final output of the network has the same shape as the input.
+
+    Padding values for the convolutions are chosen to ensure output sizes are even divisors/multiples of the input
+    sizes if the `strides` value for a layer is a factor of the input sizes. A typical case is to use `strides` values
+    of 2 and inputs that are multiples of powers of 2. An input can thus be downsampled evenly however many times its
+    dimensions can be divided by 2, so for the example network inputs would have to have dimensions that are mutliples
+    of 4. In the second example network given below the input to the bottom layer will have shape (1, 64, 15, 15) for
+    an input of shape (1, 1, 240, 240) demonstrating the input being reduced in size spatially by 2**4.
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_channels: number of input channels.
+        out_channels: number of output channels.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    Examples::
+
+        from monai.networks.nets import UNet
+
+        # 3 layer network with down/upsampling by a factor of 2 at each layer with 2-convolution residual units
+        net = UNet(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(4, 8, 16),
+            strides=(2, 2),
+            num_res_units=2
+        )
+
+        # 5 layer network with simple convolution/normalization/dropout/activation blocks defining the layers
+        net=UNet(
+            spatial_dims=2,
+            in_channels=1,
+            out_channels=1,
+            channels=(4, 8, 16, 32, 64),
+            strides=(2, 2, 2, 2),
+        )
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Note: The acceptable spatial size of input data depends on the parameters of the network,
+        to set appropriate spatial size, please check the tutorial for more details:
+        https://github.com/Project-MONAI/tutorials/blob/master/modules/UNet_input_size_constrains.ipynb.
+        Typically, when using a stride of 2 in down / up sampling, the output dimensions are either half of the
+        input when downsampling, or twice when upsampling. In this case with N numbers of layers in the network,
+        the inputs must have spatial dimensions that are all multiples of 2^N.
+        Usually, applying `resize`, `pad` or `crop` transforms can help adjust the spatial size of input data.
+
+    """
+
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_channels: int,
         out_channels: int,
         channels: Sequence[int],
@@ -40,40 +122,9 @@ def __init__(
         norm: Union[Tuple, str] = Norm.INSTANCE,
         dropout: float = 0.0,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
-        """
-        Enhanced version of UNet which has residual units implemented with the ResidualUnit class.
-        The residual part uses a convolution to change the input dimensions to match the output dimensions
-        if this is necessary but will use nn.Identity if not.
-        Refer to: https://link.springer.com/chapter/10.1007/978-3-030-12029-0_40.
-
-        Args:
-            dimensions: number of spatial dimensions.
-            in_channels: number of input channels.
-            out_channels: number of output channels.
-            channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
-            strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
-            kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
-                its length should equal to dimensions. Defaults to 3.
-            up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
-                its length should equal to dimensions. Defaults to 3.
-            num_res_units: number of residual units. Defaults to 0.
-            act: activation type and arguments. Defaults to PReLU.
-            norm: feature normalization type and arguments. Defaults to instance norm.
-            dropout: dropout ratio. Defaults to no dropout.
-            bias: whether to have a bias term in convolution blocks. Defaults to True.
-                According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
-                if a conv layer is directly followed by a batch norm layer, bias should be False.
-
-        Note: The acceptable spatial size of input data depends on the parameters of the network,
-            to set appropriate spatial size, please check the tutorial for more details:
-            https://github.com/Project-MONAI/tutorials/blob/master/modules/UNet_input_size_constrains.ipynb.
-            Typically, when using a stride of 2 in down / up sampling, the output dimensions are either half of the
-            input when downsampling, or twice when upsampling. In this case with N numbers of layers in the network,
-            the inputs must have spatial dimensions that are all multiples of 2^N.
-            Usually, applying `resize`, `pad` or `crop` transforms can help adjust the spatial size of input data.
 
-        """
         super().__init__()
 
         if len(channels) < 2:
@@ -83,14 +134,16 @@ def __init__(
             raise ValueError("the length of `strides` should equal to `len(channels) - 1`.")
         if delta > 0:
             warnings.warn(f"`len(strides) > len(channels) - 1`, the last {delta} values of strides will not be used.")
+        if dimensions is not None:
+            spatial_dims = dimensions
         if isinstance(kernel_size, Sequence):
-            if len(kernel_size) != dimensions:
+            if len(kernel_size) != spatial_dims:
                 raise ValueError("the length of `kernel_size` should equal to `dimensions`.")
         if isinstance(up_kernel_size, Sequence):
-            if len(up_kernel_size) != dimensions:
+            if len(up_kernel_size) != spatial_dims:
                 raise ValueError("the length of `up_kernel_size` should equal to `dimensions`.")
 
-        self.dimensions = dimensions
+        self.dimensions = spatial_dims
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.channels = channels
@@ -145,8 +198,10 @@ def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_
             strides: convolution stride.
             is_top: True if this is the top block.
         """
+        mod: nn.Module
         if self.num_res_units > 0:
-            return ResidualUnit(
+
+            mod = ResidualUnit(
                 self.dimensions,
                 in_channels,
                 out_channels,
@@ -158,7 +213,8 @@ def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_
                 dropout=self.dropout,
                 bias=self.bias,
             )
-        return Convolution(
+            return mod
+        mod = Convolution(
             self.dimensions,
             in_channels,
             out_channels,
@@ -169,6 +225,7 @@ def _get_down_layer(self, in_channels: int, out_channels: int, strides: int, is_
             dropout=self.dropout,
             bias=self.bias,
         )
+        return mod
 
     def _get_bottom_layer(self, in_channels: int, out_channels: int) -> nn.Module:
         """
diff --git a/monai/networks/nets/unetr.py b/monai/networks/nets/unetr.py
index 9990cb6643..b75bc15892 100644
--- a/monai/networks/nets/unetr.py
+++ b/monai/networks/nets/unetr.py
@@ -70,7 +70,7 @@ def __init__(
 
         """
 
-        super(UNETR, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
diff --git a/monai/networks/nets/varautoencoder.py b/monai/networks/nets/varautoencoder.py
index 7f54890992..b4ef8be93d 100644
--- a/monai/networks/nets/varautoencoder.py
+++ b/monai/networks/nets/varautoencoder.py
@@ -19,16 +19,65 @@
 from monai.networks.layers.convutils import calculate_out_shape, same_padding
 from monai.networks.layers.factories import Act, Norm
 from monai.networks.nets import AutoEncoder
+from monai.utils import deprecated_arg
 
 __all__ = ["VarAutoEncoder"]
 
 
 class VarAutoEncoder(AutoEncoder):
-    """Variational Autoencoder based on the paper - https://arxiv.org/abs/1312.6114"""
+    """
+    Variational Autoencoder based on the paper - https://arxiv.org/abs/1312.6114
+
+    Args:
+        spatial_dims: number of spatial dimensions.
+        in_shape: shape of input data starting with channel dimension.
+        out_channels: number of output channels.
+        latent_size: size of the latent variable.
+        channels: sequence of channels. Top block first. The length of `channels` should be no less than 2.
+        strides: sequence of convolution strides. The length of `stride` should equal to `len(channels) - 1`.
+        kernel_size: convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        up_kernel_size: upsampling convolution kernel size, the value(s) should be odd. If sequence,
+            its length should equal to dimensions. Defaults to 3.
+        num_res_units: number of residual units. Defaults to 0.
+        inter_channels: sequence of channels defining the blocks in the intermediate layer between encode and decode.
+        inter_dilations: defines the dilation value for each block of the intermediate layer. Defaults to 1.
+        num_inter_units: number of residual units for each block of the intermediate layer. Defaults to 0.
+        act: activation type and arguments. Defaults to PReLU.
+        norm: feature normalization type and arguments. Defaults to instance norm.
+        dropout: dropout ratio. Defaults to no dropout.
+        bias: whether to have a bias term in convolution blocks. Defaults to True.
+            According to `Performance Tuning Guide <https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html>`_,
+            if a conv layer is directly followed by a batch norm layer, bias should be False.
+
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
+    Examples::
+
+        from monai.networks.nets import VarAutoEncoder
+
+        # 3 layer network accepting images with dimensions (1, 32, 32) and using a latent vector with 2 values
+        model = VarAutoEncoder(
+            dimensions=2,
+            in_shape=(32, 32),  # image spatial shape
+            out_channels=1,
+            latent_size=2,
+            channels=(16, 32, 64),
+            strides=(1, 2, 2),
+        )
+
+    see also:
+        - Variational autoencoder network with MedNIST Dataset
+          https://github.com/Project-MONAI/tutorials/blob/master/modules/varautoencoder_mednist.ipynb
+    """
 
+    @deprecated_arg(
+        name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
     def __init__(
         self,
-        dimensions: int,
+        spatial_dims: int,
         in_shape: Sequence[int],
         out_channels: int,
         latent_size: int,
@@ -44,15 +93,18 @@ def __init__(
         norm: Union[Tuple, str] = Norm.INSTANCE,
         dropout: Optional[Union[Tuple, str, float]] = None,
         bias: bool = True,
+        dimensions: Optional[int] = None,
     ) -> None:
 
         self.in_channels, *self.in_shape = in_shape
 
         self.latent_size = latent_size
         self.final_size = np.asarray(self.in_shape, dtype=int)
+        if dimensions is not None:
+            spatial_dims = dimensions
 
         super().__init__(
-            dimensions,
+            spatial_dims,
             self.in_channels,
             out_channels,
             channels,
diff --git a/monai/networks/nets/vit.py b/monai/networks/nets/vit.py
index 3a5d94cc37..2707e5ad1d 100644
--- a/monai/networks/nets/vit.py
+++ b/monai/networks/nets/vit.py
@@ -18,6 +18,8 @@
 from monai.networks.blocks.patchembedding import PatchEmbeddingBlock
 from monai.networks.blocks.transformerblock import TransformerBlock
 
+__all__ = ["ViT"]
+
 
 class ViT(nn.Module):
     """
@@ -68,7 +70,7 @@ def __init__(
 
         """
 
-        super(ViT, self).__init__()
+        super().__init__()
 
         if not (0 <= dropout_rate <= 1):
             raise ValueError("dropout_rate should be between 0 and 1.")
diff --git a/monai/networks/nets/vnet.py b/monai/networks/nets/vnet.py
index 72f3290a89..1b1d3bfba7 100644
--- a/monai/networks/nets/vnet.py
+++ b/monai/networks/nets/vnet.py
@@ -30,11 +30,11 @@ def get_acti_layer(act: Union[Tuple[str, Dict], str], nchan: int = 0):
 
 class LUConv(nn.Module):
     def __init__(self, spatial_dims: int, nchan: int, act: Union[Tuple[str, Dict], str], bias: bool = False):
-        super(LUConv, self).__init__()
+        super().__init__()
 
         self.act_function = get_acti_layer(act, nchan)
         self.conv_block = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=nchan,
             out_channels=nchan,
             kernel_size=5,
@@ -65,7 +65,7 @@ def __init__(
         act: Union[Tuple[str, Dict], str],
         bias: bool = False,
     ):
-        super(InputTransition, self).__init__()
+        super().__init__()
 
         if 16 % in_channels != 0:
             raise ValueError(f"16 should be divisible by in_channels, got in_channels={in_channels}.")
@@ -74,7 +74,7 @@ def __init__(
         self.in_channels = in_channels
         self.act_function = get_acti_layer(act, 16)
         self.conv_block = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=in_channels,
             out_channels=16,
             kernel_size=5,
@@ -102,7 +102,7 @@ def __init__(
         dropout_dim: int = 3,
         bias: bool = False,
     ):
-        super(DownTransition, self).__init__()
+        super().__init__()
 
         conv_type: Type[Union[nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -138,7 +138,7 @@ def __init__(
         dropout_prob: Optional[float] = None,
         dropout_dim: int = 3,
     ):
-        super(UpTransition, self).__init__()
+        super().__init__()
 
         conv_trans_type: Type[Union[nn.ConvTranspose2d, nn.ConvTranspose3d]] = Conv[Conv.CONVTRANS, spatial_dims]
         norm_type: Type[Union[nn.BatchNorm2d, nn.BatchNorm3d]] = Norm[Norm.BATCH, spatial_dims]
@@ -174,13 +174,13 @@ def __init__(
         act: Union[Tuple[str, Dict], str],
         bias: bool = False,
     ):
-        super(OutputTransition, self).__init__()
+        super().__init__()
 
         conv_type: Type[Union[nn.Conv2d, nn.Conv3d]] = Conv[Conv.CONV, spatial_dims]
 
         self.act_function1 = get_acti_layer(act, out_channels)
         self.conv_block = Convolution(
-            dimensions=spatial_dims,
+            spatial_dims=spatial_dims,
             in_channels=in_channels,
             out_channels=out_channels,
             kernel_size=5,
diff --git a/monai/networks/utils.py b/monai/networks/utils.py
index 9d20d2a83b..8ba32f1871 100644
--- a/monai/networks/utils.py
+++ b/monai/networks/utils.py
@@ -20,6 +20,8 @@
 import torch
 import torch.nn as nn
 
+from monai.utils.deprecated import deprecated_arg
+
 __all__ = [
     "one_hot",
     "slice_channels",
@@ -225,9 +227,14 @@ def icnr_init(conv, upsample_factor, init=nn.init.kaiming_normal_):
     conv.weight.data.copy_(kernel)
 
 
-def pixelshuffle(x: torch.Tensor, dimensions: int, scale_factor: int) -> torch.Tensor:
+@deprecated_arg(
+    name="dimensions", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+)
+def pixelshuffle(
+    x: torch.Tensor, spatial_dims: int, scale_factor: int, dimensions: Optional[int] = None
+) -> torch.Tensor:
     """
-    Apply pixel shuffle to the tensor `x` with spatial dimensions `dimensions` and scaling factor `scale_factor`.
+    Apply pixel shuffle to the tensor `x` with spatial dimensions `spatial_dims` and scaling factor `scale_factor`.
 
     See: Shi et al., 2016, "Real-Time Single Image and Video Super-Resolution
     Using a nEfficient Sub-Pixel Convolutional Neural Network."
@@ -236,17 +243,21 @@ def pixelshuffle(x: torch.Tensor, dimensions: int, scale_factor: int) -> torch.T
 
     Args:
         x: Input tensor
-        dimensions: number of spatial dimensions, typically 2 or 3 for 2D or 3D
+        spatial_dims: number of spatial dimensions, typically 2 or 3 for 2D or 3D
         scale_factor: factor to rescale the spatial dimensions by, must be >=1
 
+    .. deprecated:: 0.6.0
+        ``dimensions`` is deprecated, use ``spatial_dims`` instead.
+
     Returns:
         Reshuffled version of `x`.
 
     Raises:
-        ValueError: When input channels of `x` are not divisible by (scale_factor ** dimensions)
+        ValueError: When input channels of `x` are not divisible by (scale_factor ** spatial_dims)
     """
-
-    dim, factor = dimensions, scale_factor
+    if dimensions is not None:
+        spatial_dims = dimensions
+    dim, factor = spatial_dims, scale_factor
     input_size = list(x.size())
     batch_size, channels = input_size[:2]
     scale_divisor = factor ** dim
diff --git a/monai/optimizers/lr_finder.py b/monai/optimizers/lr_finder.py
index 49d4427b3d..08395afb53 100644
--- a/monai/optimizers/lr_finder.py
+++ b/monai/optimizers/lr_finder.py
@@ -120,7 +120,7 @@ def __iter__(self):
 
     def __next__(self):
         self.run_counter += 1
-        return super(ValDataLoaderIter, self).__next__()
+        return super().__next__()
 
 
 def default_image_extractor(x: Any) -> torch.Tensor:
diff --git a/monai/optimizers/lr_scheduler.py b/monai/optimizers/lr_scheduler.py
index 9416b583f7..5ad52c5286 100644
--- a/monai/optimizers/lr_scheduler.py
+++ b/monai/optimizers/lr_scheduler.py
@@ -33,7 +33,7 @@ def __init__(self, optimizer: Optimizer, end_lr: float, num_iter: int, last_epoc
         """
         self.end_lr = end_lr
         self.num_iter = num_iter
-        super(_LRSchedulerMONAI, self).__init__(optimizer, last_epoch)
+        super().__init__(optimizer, last_epoch)
 
 
 class LinearLR(_LRSchedulerMONAI):
@@ -77,7 +77,7 @@ def __init__(
         self.warmup_steps = warmup_steps
         self.t_total = t_total
         self.cycles = cycles
-        super(WarmupCosineSchedule, self).__init__(optimizer, self.lr_lambda, last_epoch)
+        super().__init__(optimizer, self.lr_lambda, last_epoch)
 
     def lr_lambda(self, step):
         if step < self.warmup_steps:
diff --git a/monai/optimizers/novograd.py b/monai/optimizers/novograd.py
index 62e42cc9ab..6ca011d9d4 100644
--- a/monai/optimizers/novograd.py
+++ b/monai/optimizers/novograd.py
@@ -45,15 +45,15 @@ def __init__(
         amsgrad: bool = False,
     ):
         if 0.0 > lr:
-            raise ValueError("Invalid learning rate: {}".format(lr))
+            raise ValueError(f"Invalid learning rate: {lr}")
         if 0.0 > eps:
-            raise ValueError("Invalid epsilon value: {}".format(eps))
+            raise ValueError(f"Invalid epsilon value: {eps}")
         if not 0.0 <= betas[0] < 1.0:
-            raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
+            raise ValueError(f"Invalid beta parameter at index 0: {betas[0]}")
         if not 0.0 <= betas[1] < 1.0:
-            raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
+            raise ValueError(f"Invalid beta parameter at index 1: {betas[1]}")
         if 0.0 > weight_decay:
-            raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
+            raise ValueError(f"Invalid weight_decay value: {weight_decay}")
         defaults = dict(
             lr=lr,
             betas=betas,
@@ -63,10 +63,10 @@ def __init__(
             amsgrad=amsgrad,
         )
 
-        super(Novograd, self).__init__(params, defaults)
+        super().__init__(params, defaults)
 
     def __setstate__(self, state):
-        super(Novograd, self).__setstate__(state)
+        super().__setstate__(state)
         for group in self.param_groups:
             group.setdefault("amsgrad", False)
 
diff --git a/monai/optimizers/utils.py b/monai/optimizers/utils.py
index c52ab07a04..08949912d7 100644
--- a/monai/optimizers/utils.py
+++ b/monai/optimizers/utils.py
@@ -47,7 +47,7 @@ def generate_param_groups(
 
     .. code-block:: python
 
-        net = Unet(dimensions=3, in_channels=1, out_channels=3, channels=[2, 2, 2], strides=[1, 1, 1])
+        net = Unet(spatial_dims=3, in_channels=1, out_channels=3, channels=[2, 2, 2], strides=[1, 1, 1])
         print(net)  # print out network components to select expected items
         print(net.named_parameters())  # print out all the named parameters to filter out expected items
         params = generate_param_groups(
diff --git a/monai/transforms/__init__.py b/monai/transforms/__init__.py
index 2ea7e3aa63..826f66a34c 100644
--- a/monai/transforms/__init__.py
+++ b/monai/transforms/__init__.py
@@ -18,6 +18,7 @@
     CenterSpatialCrop,
     CropForeground,
     DivisiblePad,
+    Pad,
     RandCropByLabelClasses,
     RandCropByPosNegLabel,
     RandScaleCrop,
@@ -48,7 +49,7 @@
     DivisiblePadd,
     DivisiblePadD,
     DivisiblePadDict,
-    NumpyPadModeSequence,
+    PadModeSequence,
     RandCropByLabelClassesd,
     RandCropByLabelClassesD,
     RandCropByLabelClassesDict,
@@ -85,12 +86,13 @@
     GibbsNoise,
     HistogramNormalize,
     KSpaceSpikeNoise,
-    LocalPatchShuffling,
     MaskIntensity,
     NormalizeIntensity,
     RandAdjustContrast,
     RandBiasField,
     RandCoarseDropout,
+    RandCoarseShuffle,
+    RandCoarseTransform,
     RandGaussianNoise,
     RandGaussianSharpen,
     RandGaussianSmooth,
@@ -143,6 +145,9 @@
     RandCoarseDropoutd,
     RandCoarseDropoutD,
     RandCoarseDropoutDict,
+    RandCoarseShuffled,
+    RandCoarseShuffleD,
+    RandCoarseShuffleDict,
     RandGaussianNoised,
     RandGaussianNoiseD,
     RandGaussianNoiseDict,
@@ -354,6 +359,7 @@
     CastToType,
     ClassesToIndices,
     ConvertToMultiChannelBasedOnBratsClasses,
+    CuCIM,
     DataStats,
     EnsureChannelFirst,
     EnsureType,
@@ -363,6 +369,7 @@
     LabelToMask,
     Lambda,
     MapLabelValue,
+    RandCuCIM,
     RandLambda,
     RemoveRepeatedChannel,
     RepeatChannel,
@@ -405,6 +412,9 @@
     CopyItemsd,
     CopyItemsD,
     CopyItemsDict,
+    CuCIMd,
+    CuCIMD,
+    CuCIMDict,
     DataStatsd,
     DataStatsD,
     DataStatsDict,
@@ -435,6 +445,9 @@
     MapLabelValued,
     MapLabelValueD,
     MapLabelValueDict,
+    RandCuCIMd,
+    RandCuCIMD,
+    RandCuCIMDict,
     RandLambdad,
     RandLambdaD,
     RandLambdaDict,
@@ -486,6 +499,7 @@
     allow_missing_keys_mode,
     compute_divisible_spatial_size,
     convert_inverse_interp_mode,
+    convert_pad_mode,
     copypaste_arrays,
     create_control_grid,
     create_grid,
@@ -518,4 +532,16 @@
     weighted_patch_samples,
     zero_margins,
 )
-from .utils_pytorch_numpy_unification import in1d, moveaxis
+from .utils_pytorch_numpy_unification import (
+    any_np_pt,
+    clip,
+    floor_divide,
+    in1d,
+    maximum,
+    moveaxis,
+    nonzero,
+    percentile,
+    ravel,
+    unravel_index,
+    where,
+)
diff --git a/monai/transforms/compose.py b/monai/transforms/compose.py
index 4bf175769b..2405530ef3 100644
--- a/monai/transforms/compose.py
+++ b/monai/transforms/compose.py
@@ -181,7 +181,7 @@ class OneOf(Compose):
         weights: probabilities corresponding to each callable in transforms.
             Probabilities are normalized to sum to one.
 
-    OneOf inherits from Compose and uses args map_items and unpack_items in
+    ``OneOf`` inherits from ``Compose`` and uses args ``map_items`` and ``unpack_items`` in
     the same way.
     """
 
@@ -204,14 +204,13 @@ def __init__(
     def _normalize_probabilities(self, weights):
         if len(weights) == 0:
             return weights
-        else:
-            weights = np.array(weights)
-            if np.any(weights < 0):
-                raise AssertionError("Probabilities must be greater than or equal to zero.")
-            if np.all(weights == 0):
-                raise AssertionError("At least one probability must be greater than zero.")
-            weights = weights / weights.sum()
-            return list(weights)
+        weights = np.array(weights)
+        if np.any(weights < 0):
+            raise AssertionError("Probabilities must be greater than or equal to zero.")
+        if np.all(weights == 0):
+            raise AssertionError("At least one probability must be greater than zero.")
+        weights = weights / weights.sum()
+        return list(weights)
 
     def flatten(self):
         transforms = []
@@ -232,16 +231,15 @@ def flatten(self):
     def __call__(self, data):
         if len(self.transforms) == 0:
             return data
-        else:
-            index = self.R.multinomial(1, self.weights).argmax()
-            _transform = self.transforms[index]
-            data = apply_transform(_transform, data, self.map_items, self.unpack_items)
-            # if the data is a mapping (dictionary), append the OneOf transform to the end
-            if isinstance(data, Mapping):
-                for key in data.keys():
-                    if key + InverseKeys.KEY_SUFFIX in data:
-                        self.push_transform(data, key, extra_info={"index": index})
-            return data
+        index = self.R.multinomial(1, self.weights).argmax()
+        _transform = self.transforms[index]
+        data = apply_transform(_transform, data, self.map_items, self.unpack_items)
+        # if the data is a mapping (dictionary), append the OneOf transform to the end
+        if isinstance(data, Mapping):
+            for key in data.keys():
+                if key + InverseKeys.KEY_SUFFIX in data:
+                    self.push_transform(data, key, extra_info={"index": index})
+        return data
 
     def inverse(self, data):
         if len(self.transforms) == 0:
diff --git a/monai/transforms/croppad/array.py b/monai/transforms/croppad/array.py
index 74f556cc1a..c2cf03d6fe 100644
--- a/monai/transforms/croppad/array.py
+++ b/monai/transforms/croppad/array.py
@@ -22,11 +22,12 @@
 from torch.nn.functional import pad as pad_pt
 
 from monai.config import IndexSelection
-from monai.config.type_definitions import NdarrayTensor
+from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import get_random_patch, get_valid_patch_size
 from monai.transforms.transform import Randomizable, Transform
 from monai.transforms.utils import (
     compute_divisible_spatial_size,
+    convert_pad_mode,
     generate_label_classes_crop_centers,
     generate_pos_neg_label_crop_centers,
     generate_spatial_bounding_box,
@@ -35,7 +36,16 @@
     map_classes_to_indices,
     weighted_patch_samples,
 )
-from monai.utils import Method, NumpyPadMode, ensure_tuple, ensure_tuple_rep, fall_back_tuple, look_up_option
+from monai.transforms.utils_pytorch_numpy_unification import floor_divide, maximum
+from monai.utils import (
+    Method,
+    NumpyPadMode,
+    PytorchPadMode,
+    ensure_tuple,
+    ensure_tuple_rep,
+    fall_back_tuple,
+    look_up_option,
+)
 from monai.utils.enums import TransformBackends
 from monai.utils.type_conversion import convert_data_type
 
@@ -61,16 +71,18 @@
 class Pad(Transform):
     """
     Perform padding for a given an amount of padding in each dimension.
-    If input is `torch.Tensor` and mode is `constant`, `torch.nn.functional.pad` will be used.
-    Otherwise, `np.pad` will be used (input converted to `np.ndarray` if necessary).
-    Uses np.pad so in practice, a mode needs to be provided. See numpy.lib.arraypad.pad
-    for additional details.
+    If input is `torch.Tensor`, `torch.nn.functional.pad` will be used, otherwise, `np.pad` will be used.
+
     Args:
         to_pad: the amount to be padded in each dimension [(low_H, high_H), (low_W, high_W), ...].
-        mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-            ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
             One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
@@ -78,43 +90,46 @@ class Pad(Transform):
     def __init__(
         self,
         to_pad: List[Tuple[int, int]],
-        mode: Union[NumpyPadMode, str, None] = NumpyPadMode.CONSTANT,
-        **np_kwargs,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
+        **kwargs,
     ) -> None:
         self.to_pad = to_pad
-        self.mode = mode or NumpyPadMode.CONSTANT
-        self.np_kwargs = np_kwargs
+        self.mode = mode
+        self.kwargs = kwargs
 
     @staticmethod
-    def _np_pad(img: np.ndarray, all_pad_width, mode, **np_kwargs) -> np.ndarray:
-        img_np, *_ = convert_data_type(img, np.ndarray)
-        return np.pad(img_np, all_pad_width, mode=mode, **np_kwargs)  # type: ignore
+    def _np_pad(img: np.ndarray, all_pad_width, mode, **kwargs) -> np.ndarray:
+        return np.pad(img, all_pad_width, mode=mode, **kwargs)  # type: ignore
 
     @staticmethod
-    def _pt_pad(img: torch.Tensor, all_pad_width, mode, **np_kwargs) -> torch.Tensor:
-        pt_pad_width = [val for sublist in all_pad_width for val in sublist[::-1]][::-1]
-        return pad_pt(img, pt_pad_width, mode=mode, **np_kwargs)
+    def _pt_pad(img: torch.Tensor, all_pad_width, mode, **kwargs) -> torch.Tensor:
+        pt_pad_width = [val for sublist in all_pad_width[1:] for val in sublist[::-1]][::-1]
+        # torch.pad expects `[B, C, H, W, [D]]` shape
+        return pad_pt(img.unsqueeze(0), pt_pad_width, mode=mode, **kwargs).squeeze(0)
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self,
+        img: NdarrayOrTensor,
+        mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first and
                 padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
-                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"`` or ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to `self.mode`.
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+
         """
         if not np.asarray(self.to_pad).any():
             # all zeros, skip padding
             return img
-        mode = mode or self.mode
-        mode = mode.value if isinstance(mode, NumpyPadMode) else mode
-        if isinstance(img, torch.Tensor) and mode == "constant" and not self.np_kwargs:
-            pad = self._pt_pad
-        else:
-            pad = self._np_pad  # type: ignore
-        return pad(img, self.to_pad, mode, **self.np_kwargs)
+        mode = convert_pad_mode(dst=img, mode=mode or self.mode).value
+        pad = self._pt_pad if isinstance(img, torch.Tensor) else self._np_pad
+        return pad(img, self.to_pad, mode, **self.kwargs)  # type: ignore
 
 
 class SpatialPad(Transform):
@@ -135,12 +150,14 @@ class SpatialPad(Transform):
             `spatial_size=[32, 25, -1]`, the spatial size of output data will be [32, 30, 30].
         method: {``"symmetric"``, ``"end"``}
             Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
-        mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-            ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
             One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
@@ -150,13 +167,13 @@ def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
         method: Union[Method, str] = Method.SYMMETRIC,
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
-        **np_kwargs,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
+        **kwargs,
     ) -> None:
         self.spatial_size = spatial_size
         self.method: Method = look_up_option(method, Method)
-        self.mode: NumpyPadMode = look_up_option(mode, NumpyPadMode)
-        self.np_kwargs = np_kwargs
+        self.mode = mode
+        self.kwargs = kwargs
 
     def _determine_data_pad_width(self, data_shape: Sequence[int]) -> List[Tuple[int, int]]:
         spatial_size = fall_back_tuple(self.spatial_size, data_shape)
@@ -168,15 +185,22 @@ def _determine_data_pad_width(self, data_shape: Sequence[int]) -> List[Tuple[int
             return pad_width
         return [(0, max(sp_i - data_shape[i], 0)) for i, sp_i in enumerate(spatial_size)]
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self,
+        img: NdarrayOrTensor,
+        mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first and
                 padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to `self.mode`.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+
         """
         data_pad_width = self._determine_data_pad_width(img.shape[1:])
         all_pad_width = [(0, 0)] + data_pad_width
@@ -184,8 +208,7 @@ def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]]
             # all zeros, skip padding
             return img
 
-        mode = look_up_option(mode or self.mode, NumpyPadMode)
-        padder = Pad(all_pad_width, mode, **self.np_kwargs)
+        padder = Pad(all_pad_width, mode or self.mode, **self.kwargs)
         return padder(img)
 
 
@@ -204,13 +227,14 @@ class BorderPad(Transform):
               for example, image shape(CHW) is [1, 4, 4], spatial_border is [1, 2, 3, 4], pad top of H dim with 1,
               pad bottom of H dim with 2, pad left of W dim with 3, pad right of W dim with 4.
               the result shape is [1, 7, 11].
-
-        mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-            ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+        mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
             One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
@@ -219,22 +243,28 @@ class BorderPad(Transform):
     def __init__(
         self,
         spatial_border: Union[Sequence[int], int],
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
-        **np_kwargs,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
+        **kwargs,
     ) -> None:
         self.spatial_border = spatial_border
-        self.mode: NumpyPadMode = look_up_option(mode, NumpyPadMode)
-        self.np_kwargs = np_kwargs
+        self.mode = mode
+        self.kwargs = kwargs
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self,
+        img: NdarrayOrTensor,
+        mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first and
                 padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to `self.mode`.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
 
         Raises:
             ValueError: When ``self.spatial_border`` does not contain ints.
@@ -261,8 +291,7 @@ def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]]
             )
 
         all_pad_width = [(0, 0)] + data_pad_width
-        mode = look_up_option(mode or self.mode, NumpyPadMode)
-        padder = Pad(all_pad_width, mode, **self.np_kwargs)
+        padder = Pad(all_pad_width, mode or self.mode, **self.kwargs)
         return padder(img)
 
 
@@ -276,47 +305,56 @@ class DivisiblePad(Transform):
     def __init__(
         self,
         k: Union[Sequence[int], int],
-        mode: Union[NumpyPadMode, str] = NumpyPadMode.CONSTANT,
+        mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.CONSTANT,
         method: Union[Method, str] = Method.SYMMETRIC,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
             k: the target k for each spatial dimension.
                 if `k` is negative or 0, the original size is preserved.
                 if `k` is an int, the same `k` be applied to all the input spatial dimensions.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
             method: {``"symmetric"``, ``"end"``}
                 Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         See also :py:class:`monai.transforms.SpatialPad`
         """
         self.k = k
         self.mode: NumpyPadMode = NumpyPadMode(mode)
         self.method: Method = Method(method)
-        self.np_kwargs = np_kwargs
+        self.kwargs = kwargs
 
-    def __call__(self, img: NdarrayTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayTensor:
+    def __call__(
+        self,
+        img: NdarrayOrTensor,
+        mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: data to be transformed, assuming `img` is channel-first
                 and padding doesn't apply to the channel dim.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
-                One of the listed string values or a user supplied function. Defaults to ``self.mode``.
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to `self.mode`.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
+
         """
         new_size = compute_divisible_spatial_size(spatial_shape=img.shape[1:], k=self.k)
         spatial_pad = SpatialPad(
             spatial_size=new_size,
             method=self.method,
             mode=mode or self.mode,
-            **self.np_kwargs,
+            **self.kwargs,
         )
 
         return spatial_pad(img)
@@ -336,12 +374,14 @@ class SpatialCrop(Transform):
         - the start and end coordinates of the ROI
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
-        roi_center: Union[Sequence[int], np.ndarray, None] = None,
-        roi_size: Union[Sequence[int], np.ndarray, None] = None,
-        roi_start: Union[Sequence[int], np.ndarray, None] = None,
-        roi_end: Union[Sequence[int], np.ndarray, None] = None,
+        roi_center: Union[Sequence[int], NdarrayOrTensor, None] = None,
+        roi_size: Union[Sequence[int], NdarrayOrTensor, None] = None,
+        roi_start: Union[Sequence[int], NdarrayOrTensor, None] = None,
+        roi_end: Union[Sequence[int], NdarrayOrTensor, None] = None,
         roi_slices: Optional[Sequence[slice]] = None,
     ) -> None:
         """
@@ -354,28 +394,34 @@ def __init__(
                 use the end coordinate of image.
             roi_slices: list of slices for each of the spatial dimensions.
         """
+        roi_start_torch: torch.Tensor
+
         if roi_slices:
             if not all(s.step is None or s.step == 1 for s in roi_slices):
                 raise ValueError("Only slice steps of 1/None are currently supported")
             self.slices = list(roi_slices)
         else:
             if roi_center is not None and roi_size is not None:
-                roi_center = np.asarray(roi_center, dtype=np.int16)
-                roi_size = np.asarray(roi_size, dtype=np.int16)
-                roi_start_np = np.maximum(roi_center - np.floor_divide(roi_size, 2), 0)
-                roi_end_np = np.maximum(roi_start_np + roi_size, roi_start_np)
+                roi_center = torch.as_tensor(roi_center, dtype=torch.int16)
+                roi_size = torch.as_tensor(roi_size, dtype=torch.int16, device=roi_center.device)
+                roi_start_torch = maximum(  # type: ignore
+                    roi_center - floor_divide(roi_size, 2),
+                    torch.zeros_like(roi_center),
+                )
+                roi_end_torch = maximum(roi_start_torch + roi_size, roi_start_torch)
             else:
                 if roi_start is None or roi_end is None:
                     raise ValueError("Please specify either roi_center, roi_size or roi_start, roi_end.")
-                roi_start_np = np.maximum(np.asarray(roi_start, dtype=np.int16), 0)
-                roi_end_np = np.maximum(np.asarray(roi_end, dtype=np.int16), roi_start_np)
-            # Allow for 1D by converting back to np.array (since np.maximum will convert to int)
-            roi_start_np = roi_start_np if isinstance(roi_start_np, np.ndarray) else np.array([roi_start_np])
-            roi_end_np = roi_end_np if isinstance(roi_end_np, np.ndarray) else np.array([roi_end_np])
-            # convert to slices
-            self.slices = [slice(s, e) for s, e in zip(roi_start_np, roi_end_np)]
-
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]):
+                roi_start_torch = torch.as_tensor(roi_start, dtype=torch.int16)
+                roi_start_torch = maximum(roi_start_torch, torch.zeros_like(roi_start_torch))  # type: ignore
+                roi_end_torch = maximum(torch.as_tensor(roi_end, dtype=torch.int16), roi_start_torch)
+            # convert to slices (accounting for 1d)
+            if roi_start_torch.numel() == 1:
+                self.slices = [slice(int(roi_start_torch.item()), int(roi_end_torch.item()))]
+            else:
+                self.slices = [slice(int(s), int(e)) for s, e in zip(roi_start_torch.tolist(), roi_end_torch.tolist())]
+
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
@@ -400,10 +446,12 @@ class CenterSpatialCrop(Transform):
             the spatial size of output data will be [32, 40, 40].
     """
 
+    backend = SpatialCrop.backend
+
     def __init__(self, roi_size: Union[Sequence[int], int]) -> None:
         self.roi_size = roi_size
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
@@ -424,10 +472,12 @@ class CenterScaleCrop(Transform):
 
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(self, roi_scale: Union[Sequence[float], float]):
         self.roi_scale = roi_scale
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         img_size = img.shape[1:]
         ndim = len(img_size)
         roi_size = [ceil(r * s) for r, s in zip(ensure_tuple_rep(self.roi_scale, ndim), img_size)]
@@ -459,6 +509,8 @@ class RandSpatialCrop(Randomizable, Transform):
             if True, the actual size is sampled from `randint(roi_size, max_roi_size + 1)`.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self,
         roi_size: Union[Sequence[int], int],
@@ -479,19 +531,19 @@ def randomize(self, img_size: Sequence[int]) -> None:
             max_size = img_size if self.max_roi_size is None else fall_back_tuple(self.max_roi_size, img_size)
             if any(i > j for i, j in zip(self._size, max_size)):
                 raise ValueError(f"min ROI size: {self._size} is bigger than max ROI size: {max_size}.")
-            self._size = tuple((self.R.randint(low=self._size[i], high=max_size[i] + 1) for i in range(len(img_size))))
+            self._size = tuple(self.R.randint(low=self._size[i], high=max_size[i] + 1) for i in range(len(img_size)))
         if self.random_center:
             valid_size = get_valid_patch_size(img_size, self._size)
             self._slices = (slice(None),) + get_random_patch(img_size, valid_size, self.R)
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
         """
         self.randomize(img.shape[1:])
         if self._size is None:
-            raise AssertionError
+            raise RuntimeError("self._size not specified.")
         if self.random_center:
             return img[self._slices]
         cropper = CenterSpatialCrop(self._size)
@@ -530,7 +582,7 @@ def __init__(
         self.roi_scale = roi_scale
         self.max_roi_scale = max_roi_scale
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't apply to the channel dim.
@@ -576,6 +628,8 @@ class RandSpatialCropSamples(Randomizable, Transform):
 
     """
 
+    backend = RandScaleCrop.backend
+
     def __init__(
         self,
         roi_size: Union[Sequence[int], int],
@@ -599,7 +653,7 @@ def set_random_state(
     def randomize(self, data: Optional[Any] = None) -> None:
         pass
 
-    def __call__(self, img: np.ndarray) -> List[np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor) -> List[NdarrayOrTensor]:
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         cropping doesn't change the channel dim.
@@ -639,6 +693,8 @@ def threshold_at_one(x):
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
         select_fn: Callable = is_positive,
@@ -674,13 +730,15 @@ def __init__(
         self.mode: NumpyPadMode = look_up_option(mode, NumpyPadMode)
         self.np_kwargs = np_kwargs
 
-    def compute_bounding_box(self, img: np.ndarray):
+    def compute_bounding_box(self, img: NdarrayOrTensor) -> Tuple[np.ndarray, np.ndarray]:
         """
         Compute the start points and end points of bounding box to crop.
         And adjust bounding box coords to be divisible by `k`.
 
         """
         box_start, box_end = generate_spatial_bounding_box(img, self.select_fn, self.channel_indices, self.margin)
+        box_start = [i.cpu() if isinstance(i, torch.Tensor) else i for i in box_start]  # type: ignore
+        box_end = [i.cpu() if isinstance(i, torch.Tensor) else i for i in box_end]  # type: ignore
         box_start_ = np.asarray(box_start, dtype=np.int16)
         box_end_ = np.asarray(box_end, dtype=np.int16)
         orig_spatial_size = box_end_ - box_start_
@@ -693,7 +751,7 @@ def compute_bounding_box(self, img: np.ndarray):
 
     def crop_pad(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         box_start: np.ndarray,
         box_end: np.ndarray,
         mode: Optional[Union[NumpyPadMode, str]] = None,
@@ -708,7 +766,7 @@ def crop_pad(
         pad = list(chain(*zip(pad_to_start.tolist(), pad_to_end.tolist())))
         return BorderPad(spatial_border=pad, mode=mode or self.mode, **self.np_kwargs)(cropped)
 
-    def __call__(self, img: np.ndarray, mode: Optional[Union[NumpyPadMode, str]] = None):
+    def __call__(self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, str]] = None):
         """
         Apply the transform to `img`, assuming `img` is channel-first and
         slicing doesn't change the channel dim.
@@ -758,18 +816,23 @@ def __call__(self, img: np.ndarray, weight_map: Optional[np.ndarray] = None) ->
         Returns:
             A list of image patches
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         if weight_map is None:
             weight_map = self.weight_map
         if weight_map is None:
             raise ValueError("weight map must be provided for weighted patch sampling.")
         if img.shape[1:] != weight_map.shape[1:]:
             raise ValueError(f"image and weight map spatial shape mismatch: {img.shape[1:]} vs {weight_map.shape[1:]}.")
+
+        weight_map, *_ = convert_data_type(weight_map, np.ndarray)  # type: ignore
+
         self.randomize(weight_map)
         _spatial_size = fall_back_tuple(self.spatial_size, weight_map.shape[1:])
         results = []
         for center in self.centers:
             cropper = SpatialCrop(roi_center=center, roi_size=_spatial_size)
-            results.append(cropper(img))
+            cropped: np.ndarray = cropper(img)  # type: ignore
+            results.append(cropped)
         return results
 
 
@@ -823,17 +886,19 @@ class RandCropByPosNegLabel(Randomizable, Transform):
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
-        label: Optional[np.ndarray] = None,
+        label: Optional[NdarrayOrTensor] = None,
         pos: float = 1.0,
         neg: float = 1.0,
         num_samples: int = 1,
-        image: Optional[np.ndarray] = None,
+        image: Optional[NdarrayOrTensor] = None,
         image_threshold: float = 0.0,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = ensure_tuple(spatial_size)
         self.label = label
@@ -845,16 +910,16 @@ def __init__(
         self.num_samples = num_samples
         self.image = image
         self.image_threshold = image_threshold
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
         self.fg_indices = fg_indices
         self.bg_indices = bg_indices
 
     def randomize(
         self,
-        label: np.ndarray,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
         if fg_indices is None or bg_indices is None:
@@ -872,12 +937,12 @@ def randomize(
 
     def __call__(
         self,
-        img: np.ndarray,
-        label: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
-    ) -> List[np.ndarray]:
+        img: NdarrayOrTensor,
+        label: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
+    ) -> List[NdarrayOrTensor]:
         """
         Args:
             img: input data to crop samples from based on the pos/neg ratio of `label` and `image`.
@@ -900,10 +965,10 @@ def __call__(
             image = self.image
 
         self.randomize(label, fg_indices, bg_indices, image)
-        results: List[np.ndarray] = []
+        results: List[NdarrayOrTensor] = []
         if self.centers is not None:
             for center in self.centers:
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=center, roi_size=self.spatial_size)
                 results.append(cropper(img))
 
         return results
@@ -968,16 +1033,18 @@ class RandCropByLabelClasses(Randomizable, Transform):
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
         ratios: Optional[List[Union[float, int]]] = None,
-        label: Optional[np.ndarray] = None,
+        label: Optional[NdarrayOrTensor] = None,
         num_classes: Optional[int] = None,
         num_samples: int = 1,
-        image: Optional[np.ndarray] = None,
+        image: Optional[NdarrayOrTensor] = None,
         image_threshold: float = 0.0,
-        indices: Optional[List[np.ndarray]] = None,
+        indices: Optional[List[NdarrayOrTensor]] = None,
     ) -> None:
         self.spatial_size = ensure_tuple(spatial_size)
         self.ratios = ratios
@@ -986,17 +1053,17 @@ def __init__(
         self.num_samples = num_samples
         self.image = image
         self.image_threshold = image_threshold
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
         self.indices = indices
 
     def randomize(
         self,
-        label: np.ndarray,
-        indices: Optional[List[np.ndarray]] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        indices: Optional[List[NdarrayOrTensor]] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
-        indices_: List[np.ndarray]
+        indices_: Sequence[NdarrayOrTensor]
         if indices is None:
             if self.indices is not None:
                 indices_ = self.indices
@@ -1010,11 +1077,11 @@ def randomize(
 
     def __call__(
         self,
-        img: np.ndarray,
-        label: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
-        indices: Optional[List[np.ndarray]] = None,
-    ) -> List[np.ndarray]:
+        img: NdarrayOrTensor,
+        label: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
+        indices: Optional[List[NdarrayOrTensor]] = None,
+    ) -> List[NdarrayOrTensor]:
         """
         Args:
             img: input data to crop samples from based on the ratios of every class, assumes `img` is a
@@ -1033,10 +1100,10 @@ def __call__(
             image = self.image
 
         self.randomize(label, indices, image)
-        results: List[np.ndarray] = []
+        results: List[NdarrayOrTensor] = []
         if self.centers is not None:
             for center in self.centers:
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)
                 results.append(cropper(img))
 
         return results
@@ -1063,6 +1130,8 @@ class ResizeWithPadOrCrop(Transform):
 
     """
 
+    backend = list(set(SpatialPad.backend) & set(CenterSpatialCrop.backend))
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
@@ -1073,7 +1142,7 @@ def __init__(
         self.padder = SpatialPad(spatial_size=spatial_size, method=method, mode=mode, **np_kwargs)
         self.cropper = CenterSpatialCrop(roi_size=spatial_size)
 
-    def __call__(self, img: np.ndarray, mode: Optional[Union[NumpyPadMode, str]] = None) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor, mode: Optional[Union[NumpyPadMode, str]] = None) -> NdarrayOrTensor:
         """
         Args:
             img: data to pad or crop, assuming `img` is channel-first and
@@ -1084,7 +1153,7 @@ def __call__(self, img: np.ndarray, mode: Optional[Union[NumpyPadMode, str]] = N
                 If None, defaults to the ``mode`` in construction.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
         """
-        return self.padder(self.cropper(img), mode=mode)
+        return self.padder(self.cropper(img), mode=mode)  # type: ignore
 
 
 class BoundingRect(Transform):
@@ -1118,6 +1187,7 @@ def __call__(self, img: np.ndarray) -> np.ndarray:
         """
         See also: :py:class:`monai.transforms.utils.generate_spatial_bounding_box`.
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         bbox = []
 
         for channel in range(img.shape[0]):
diff --git a/monai/transforms/croppad/dictionary.py b/monai/transforms/croppad/dictionary.py
index 9e33ab2db1..222d6ae17c 100644
--- a/monai/transforms/croppad/dictionary.py
+++ b/monai/transforms/croppad/dictionary.py
@@ -25,7 +25,7 @@
 import numpy as np
 
 from monai.config import IndexSelection, KeysCollection
-from monai.config.type_definitions import NdarrayTensor
+from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import get_random_patch, get_valid_patch_size
 from monai.transforms.croppad.array import (
     BorderPad,
@@ -33,6 +33,8 @@
     CenterSpatialCrop,
     CropForeground,
     DivisiblePad,
+    RandCropByLabelClasses,
+    RandCropByPosNegLabel,
     ResizeWithPadOrCrop,
     SpatialCrop,
     SpatialPad,
@@ -49,11 +51,12 @@
     weighted_patch_samples,
 )
 from monai.utils import ImageMetaKey as Key
-from monai.utils import Method, NumpyPadMode, ensure_tuple, ensure_tuple_rep, fall_back_tuple
+from monai.utils import Method, NumpyPadMode, PytorchPadMode, ensure_tuple, ensure_tuple_rep, fall_back_tuple
 from monai.utils.enums import InverseKeys
+from monai.utils.type_conversion import convert_data_type
 
 __all__ = [
-    "NumpyPadModeSequence",
+    "PadModeSequence",
     "SpatialPadd",
     "BorderPadd",
     "DivisiblePadd",
@@ -99,6 +102,7 @@
 ]
 
 NumpyPadModeSequence = Union[Sequence[Union[NumpyPadMode, str]], NumpyPadMode, str]
+PadModeSequence = Union[Sequence[Union[NumpyPadMode, PytorchPadMode, str]], NumpyPadMode, PytorchPadMode, str]
 
 
 class SpatialPadd(MapTransform, InvertibleTransform):
@@ -114,9 +118,9 @@ def __init__(
         keys: KeysCollection,
         spatial_size: Union[Sequence[int], int],
         method: Union[Method, str] = Method.SYMMETRIC,
-        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
+        mode: PadModeSequence = NumpyPadMode.CONSTANT,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
@@ -129,28 +133,30 @@ def __init__(
                 the spatial size of output data will be [32, 30, 30].
             method: {``"symmetric"``, ``"end"``}
                 Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
             allow_missing_keys: don't raise exception if key is missing.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         """
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
-        self.padder = SpatialPad(spatial_size, method, **np_kwargs)
+        self.padder = SpatialPad(spatial_size, method, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -183,9 +189,9 @@ def __init__(
         self,
         keys: KeysCollection,
         spatial_border: Union[Sequence[int], int],
-        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
+        mode: PadModeSequence = NumpyPadMode.CONSTANT,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
@@ -202,28 +208,30 @@ def __init__(
                   pad bottom of H dim with 2, pad left of W dim with 3, pad right of W dim with 4.
                   the result shape is [1, 7, 11].
 
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
             allow_missing_keys: don't raise exception if key is missing.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         """
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
-        self.padder = BorderPad(spatial_border=spatial_border, **np_kwargs)
+        self.padder = BorderPad(spatial_border=spatial_border, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -260,10 +268,10 @@ def __init__(
         self,
         keys: KeysCollection,
         k: Union[Sequence[int], int],
-        mode: NumpyPadModeSequence = NumpyPadMode.CONSTANT,
+        mode: PadModeSequence = NumpyPadMode.CONSTANT,
         method: Union[Method, str] = Method.SYMMETRIC,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         """
         Args:
@@ -272,32 +280,34 @@ def __init__(
             k: the target k for each spatial dimension.
                 if `k` is negative or 0, the original size is preserved.
                 if `k` is an int, the same `k` be applied to all the input spatial dimensions.
-            mode: {``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``, ``"mean"``,
-                ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
                 One of the listed string values or a user supplied function. Defaults to ``"constant"``.
                 See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
             method: {``"symmetric"``, ``"end"``}
                 Pad image symmetrically on every side or only pad at the end sides. Defaults to ``"symmetric"``.
             allow_missing_keys: don't raise exception if key is missing.
-            np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-                more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            kwargs: other arguments for the `np.pad` or `torch.pad` function.
+                note that `np.pad` treats channel dimension as the first dimension.
 
         See also :py:class:`monai.transforms.SpatialPad`
 
         """
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
-        self.padder = DivisiblePad(k=k, method=method, **np_kwargs)
+        self.padder = DivisiblePad(k=k, method=method, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             self.push_transform(d, key, extra_info={"mode": m.value if isinstance(m, Enum) else m})
             d[key] = self.padder(d[key], mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -331,6 +341,8 @@ class SpatialCropd(MapTransform, InvertibleTransform):
         - the start and end coordinates of the ROI
     """
 
+    backend = SpatialCrop.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -357,14 +369,14 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.cropper = SpatialCrop(roi_center, roi_size, roi_start, roi_end, roi_slices)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             self.push_transform(d, key)
             d[key] = self.cropper(d[key])
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -404,13 +416,15 @@ class CenterSpatialCropd(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self, keys: KeysCollection, roi_size: Union[Sequence[int], int], allow_missing_keys: bool = False
     ) -> None:
         super().__init__(keys, allow_missing_keys)
         self.cropper = CenterSpatialCrop(roi_size)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             orig_size = d[key].shape[1:]
@@ -418,7 +432,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             self.push_transform(d, key, orig_size=orig_size)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -454,13 +468,15 @@ class CenterScaleCropd(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self, keys: KeysCollection, roi_scale: Union[Sequence[float], float], allow_missing_keys: bool = False
     ) -> None:
         super().__init__(keys, allow_missing_keys=allow_missing_keys)
         self.roi_scale = roi_scale
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         # use the spatial size of first image to scale, expect all images have the same spatial size
         img_size = data[self.keys[0]].shape[1:]
@@ -473,7 +489,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
 
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -525,6 +541,8 @@ class RandSpatialCropd(Randomizable, MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = CenterSpatialCrop.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -553,11 +571,11 @@ def randomize(self, img_size: Sequence[int]) -> None:
             valid_size = get_valid_patch_size(img_size, self._size)
             self._slices = (slice(None),) + get_random_patch(img_size, valid_size, self.R)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         self.randomize(d[self.keys[0]].shape[1:])  # image shape from the first data key
         if self._size is None:
-            raise AssertionError
+            raise RuntimeError("self._size not specified.")
         for key in self.key_iterator(d):
             if self.random_center:
                 self.push_transform(d, key, {"slices": [(i.start, i.stop) for i in self._slices[1:]]})  # type: ignore
@@ -568,7 +586,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
                 d[key] = cropper(d[key])
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -626,6 +644,8 @@ class RandScaleCropd(RandSpatialCropd):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = RandSpatialCropd.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -643,11 +663,10 @@ def __init__(
             random_size=random_size,
             allow_missing_keys=allow_missing_keys,
         )
-        MapTransform.__init__(self, keys, allow_missing_keys)
         self.roi_scale = roi_scale
         self.max_roi_scale = max_roi_scale
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         img_size = data[self.keys[0]].shape[1:]
         ndim = len(img_size)
         self.roi_size = [ceil(r * s) for r, s in zip(ensure_tuple_rep(self.roi_scale, ndim), img_size)]
@@ -711,6 +730,8 @@ class RandSpatialCropSamplesd(Randomizable, MapTransform, InvertibleTransform):
 
     """
 
+    backend = RandSpatialCropd.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -743,7 +764,7 @@ def set_random_state(
     def randomize(self, data: Optional[Any] = None) -> None:
         pass
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> List[Dict[Hashable, NdarrayOrTensor]]:
         ret = []
         for i in range(self.num_samples):
             d = dict(data)
@@ -753,18 +774,18 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
             cropped = self.cropper(d)
             # self.cropper will have added RandSpatialCropd to the list. Change to RandSpatialCropSamplesd
             for key in self.key_iterator(cropped):
-                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.CLASS_NAME] = self.__class__.__name__
-                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.ID] = id(self)
+                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.CLASS_NAME] = self.__class__.__name__  # type: ignore
+                cropped[str(key) + InverseKeys.KEY_SUFFIX][-1][InverseKeys.ID] = id(self)  # type: ignore
             # add `patch_index` to the meta data
             for key, meta_key, meta_key_postfix in self.key_iterator(d, self.meta_keys, self.meta_key_postfix):
                 meta_key = meta_key or f"{key}_{meta_key_postfix}"
                 if meta_key not in cropped:
                     cropped[meta_key] = {}  # type: ignore
-                cropped[meta_key][Key.PATCH_INDEX] = i
+                cropped[meta_key][Key.PATCH_INDEX] = i  # type: ignore
             ret.append(cropped)
         return ret
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
         d = deepcopy(dict(data))
         # We changed the transform name from RandSpatialCropd to RandSpatialCropSamplesd
         # Need to revert that since we're calling RandSpatialCropd's inverse
@@ -841,7 +862,9 @@ def __init__(
 
     def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
         d = dict(data)
-        box_start, box_end = self.cropper.compute_bounding_box(img=d[self.source_key])
+        img: np.ndarray
+        img, *_ = convert_data_type(d[self.source_key], np.ndarray)  # type: ignore
+        box_start, box_end = self.cropper.compute_bounding_box(img=img)
         d[self.start_coord_key] = box_start
         d[self.end_coord_key] = box_end
         for key, m in self.key_iterator(d, self.mode):
@@ -849,7 +872,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             d[key] = self.cropper.crop_pad(img=d[key], box_start=box_start, box_end=box_end, mode=m)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -954,7 +977,8 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
             for i, center in enumerate(self.centers):
                 cropper = SpatialCrop(roi_center=center, roi_size=_spatial_size)
                 orig_size = img.shape[1:]
-                results[i][key] = cropper(img)
+                cropped: np.ndarray = cropper(img)  # type: ignore
+                results[i][key] = cropped
                 self.push_transform(results[i], key, extra_info={"center": center}, orig_size=orig_size)
                 if self.center_coord_key:
                     results[i][self.center_coord_key] = center
@@ -969,7 +993,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
 
         return results
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1048,6 +1072,8 @@ class RandCropByPosNegLabeld(Randomizable, MapTransform, InvertibleTransform):
 
     """
 
+    backend = RandCropByPosNegLabel.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1081,14 +1107,14 @@ def __init__(
         if len(self.keys) != len(self.meta_keys):
             raise ValueError("meta_keys should have the same length as keys.")
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
 
     def randomize(
         self,
-        label: np.ndarray,
-        fg_indices: Optional[np.ndarray] = None,
-        bg_indices: Optional[np.ndarray] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        fg_indices: Optional[NdarrayOrTensor] = None,
+        bg_indices: Optional[NdarrayOrTensor] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
         if fg_indices is None or bg_indices is None:
@@ -1100,7 +1126,7 @@ def randomize(
             self.spatial_size, self.num_samples, self.pos_ratio, label.shape[1:], fg_indices_, bg_indices_, self.R
         )
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> List[Dict[Hashable, NdarrayOrTensor]]:
         d = dict(data)
         label = d[self.label_key]
         image = d[self.image_key] if self.image_key else None
@@ -1114,7 +1140,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
             raise ValueError("no available ROI centers to crop.")
 
         # initialize returned list with shallow copy to preserve key ordering
-        results: List[Dict[Hashable, np.ndarray]] = [dict(d) for _ in range(self.num_samples)]
+        results: List[Dict[Hashable, NdarrayOrTensor]] = [dict(d) for _ in range(self.num_samples)]
 
         for i, center in enumerate(self.centers):
             # fill in the extra keys with unmodified data
@@ -1122,7 +1148,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
                 results[i][key] = deepcopy(d[key])
             for key in self.key_iterator(d):
                 img = d[key]
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)
                 orig_size = img.shape[1:]
                 results[i][key] = cropper(img)
                 self.push_transform(results[i], key, extra_info={"center": center}, orig_size=orig_size)
@@ -1131,11 +1157,11 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> List[Dict[Hashable, n
                 meta_key = meta_key or f"{key}_{meta_key_postfix}"
                 if meta_key not in results[i]:
                     results[i][meta_key] = {}  # type: ignore
-                results[i][meta_key][Key.PATCH_INDEX] = i
+                results[i][meta_key][Key.PATCH_INDEX] = i  # type: ignore
 
         return results
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1234,6 +1260,8 @@ class RandCropByLabelClassesd(Randomizable, MapTransform, InvertibleTransform):
 
     """
 
+    backend = RandCropByLabelClasses.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1262,16 +1290,15 @@ def __init__(
         if len(self.keys) != len(self.meta_keys):
             raise ValueError("meta_keys should have the same length as keys.")
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
-        self.centers: Optional[List[List[np.ndarray]]] = None
+        self.centers: Optional[List[List[int]]] = None
 
     def randomize(
         self,
-        label: np.ndarray,
-        indices: Optional[List[np.ndarray]] = None,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        indices: Optional[List[NdarrayOrTensor]] = None,
+        image: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.spatial_size = fall_back_tuple(self.spatial_size, default=label.shape[1:])
-        indices_: List[np.ndarray]
         if indices is None:
             indices_ = map_classes_to_indices(label, self.num_classes, image, self.image_threshold)
         else:
@@ -1280,7 +1307,7 @@ def randomize(
             self.spatial_size, self.num_samples, label.shape[1:], indices_, self.ratios, self.R
         )
 
-    def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, NdarrayOrTensor]]:
         d = dict(data)
         label = d[self.label_key]
         image = d[self.image_key] if self.image_key else None
@@ -1293,7 +1320,7 @@ def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarr
             raise ValueError("no available ROI centers to crop.")
 
         # initialize returned list with shallow copy to preserve key ordering
-        results: List[Dict[Hashable, np.ndarray]] = [dict(d) for _ in range(self.num_samples)]
+        results: List[Dict[Hashable, NdarrayOrTensor]] = [dict(d) for _ in range(self.num_samples)]
 
         for i, center in enumerate(self.centers):
             # fill in the extra keys with unmodified data
@@ -1301,7 +1328,7 @@ def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarr
                 results[i][key] = deepcopy(d[key])
             for key in self.key_iterator(d):
                 img = d[key]
-                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)  # type: ignore
+                cropper = SpatialCrop(roi_center=tuple(center), roi_size=self.spatial_size)
                 orig_size = img.shape[1:]
                 results[i][key] = cropper(img)
                 self.push_transform(results[i], key, extra_info={"center": center}, orig_size=orig_size)
@@ -1310,11 +1337,11 @@ def __call__(self, data: Mapping[Hashable, Any]) -> List[Dict[Hashable, np.ndarr
                 meta_key = meta_key or f"{key}_{meta_key_postfix}"
                 if meta_key not in results[i]:
                     results[i][meta_key] = {}  # type: ignore
-                results[i][meta_key][Key.PATCH_INDEX] = i
+                results[i][meta_key][Key.PATCH_INDEX] = i  # type: ignore
 
         return results
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1359,6 +1386,8 @@ class ResizeWithPadOrCropd(MapTransform, InvertibleTransform):
 
     """
 
+    backend = ResizeWithPadOrCrop.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1372,7 +1401,7 @@ def __init__(
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padcropper = ResizeWithPadOrCrop(spatial_size=spatial_size, method=method, **np_kwargs)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, m in self.key_iterator(d, self.mode):
             orig_size = d[key].shape[1:]
@@ -1387,7 +1416,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
diff --git a/monai/transforms/intensity/array.py b/monai/transforms/intensity/array.py
index 20d306be04..376c2c811f 100644
--- a/monai/transforms/intensity/array.py
+++ b/monai/transforms/intensity/array.py
@@ -13,7 +13,7 @@
 https://github.com/Project-MONAI/MONAI/wiki/MONAI_Design
 """
 
-import copy
+from abc import abstractmethod
 from collections.abc import Iterable
 from functools import partial
 from typing import Any, Callable, List, Optional, Sequence, Tuple, Union
@@ -28,6 +28,7 @@
 from monai.networks.layers import GaussianFilter, HilbertTransform, SavitzkyGolayFilter
 from monai.transforms.transform import RandomizableTransform, Transform
 from monai.transforms.utils import Fourier, equalize_hist, is_positive, rescale_array
+from monai.transforms.utils_pytorch_numpy_unification import clip, percentile, where
 from monai.utils import (
     PT_BEFORE_1_7,
     InvalidPyTorchVersionError,
@@ -38,6 +39,7 @@
     ensure_tuple_size,
     fall_back_tuple,
 )
+from monai.utils.deprecated import deprecated_arg
 from monai.utils.enums import TransformBackends
 from monai.utils.type_conversion import convert_to_tensor, get_equivalent_dtype
 
@@ -69,9 +71,10 @@
     "RandGibbsNoise",
     "KSpaceSpikeNoise",
     "RandKSpaceSpikeNoise",
+    "RandCoarseTransform",
     "RandCoarseDropout",
+    "RandCoarseShuffle",
     "HistogramNormalize",
-    "LocalPatchShuffling",
 ]
 
 
@@ -87,27 +90,28 @@ class RandGaussianNoise(RandomizableTransform):
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
-    def __init__(self, prob: float = 0.1, mean: Union[Sequence[float], float] = 0.0, std: float = 0.1) -> None:
+    def __init__(self, prob: float = 0.1, mean: float = 0.0, std: float = 0.1) -> None:
         RandomizableTransform.__init__(self, prob)
         self.mean = mean
         self.std = std
-        self._noise: np.ndarray
 
-    def randomize(self, im_shape: Sequence[int]) -> None:
+    def randomize(self, data: Any) -> None:
         super().randomize(None)
-        self._noise = self.R.normal(self.mean, self.R.uniform(0, self.std), size=im_shape)
+        self._rand_std = self.R.uniform(0, self.std)
+
+    def _add_noise(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+        noise = self.R.normal(self.mean, self._rand_std, size=img.shape)
+        noise_, *_ = convert_to_dst_type(noise, img)
+        return img + noise_
 
     def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        self.randomize(img.shape)
-        if self._noise is None:
-            raise RuntimeError("randomized factor should not be None.")
+        self.randomize(None)
         if not self._do_transform:
             return img
-        noise, *_ = convert_to_dst_type(self._noise, img)
-        return img + noise
+        return self._add_noise(img)
 
 
 class RandRicianNoise(RandomizableTransform):
@@ -378,7 +382,12 @@ class ScaleIntensity(Transform):
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
     def __init__(
-        self, minv: Optional[float] = 0.0, maxv: Optional[float] = 1.0, factor: Optional[float] = None
+        self,
+        minv: Optional[float] = 0.0,
+        maxv: Optional[float] = 1.0,
+        factor: Optional[float] = None,
+        channel_wise: bool = False,
+        dtype: DtypeLike = np.float32,
     ) -> None:
         """
         Args:
@@ -386,10 +395,15 @@ def __init__(
             maxv: maximum value of output data.
             factor: factor scale by ``v = v * (1 + factor)``. In order to use
                 this parameter, please set `minv` and `maxv` into None.
+            channel_wise: if True, scale on each channel separately. Please ensure
+                that the first dimension represents the channel of the image if True.
+            dtype: output data type, defaults to float32.
         """
         self.minv = minv
         self.maxv = maxv
         self.factor = factor
+        self.channel_wise = channel_wise
+        self.dtype = dtype
 
     def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
@@ -400,11 +414,15 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
 
         """
         if self.minv is not None and self.maxv is not None:
-            return rescale_array(img, self.minv, self.maxv, img.dtype)
+            if self.channel_wise:
+                out = [rescale_array(d, self.minv, self.maxv, dtype=self.dtype) for d in img]
+                return torch.stack(out) if isinstance(img, torch.Tensor) else np.stack(out)  # type: ignore
+            else:
+                return rescale_array(img, self.minv, self.maxv, dtype=self.dtype)
         if self.factor is not None:
-            out = img * (1 + self.factor)
-            out, *_ = convert_data_type(out, dtype=img.dtype)
-            return out
+            ret = img * (1 + self.factor)
+            ret, *_ = convert_data_type(ret, dtype=self.dtype)
+            return ret
         raise ValueError("Incompatible values: minv=None or maxv=None and factor=None.")
 
 
@@ -416,12 +434,18 @@ class RandScaleIntensity(RandomizableTransform):
 
     backend = ScaleIntensity.backend
 
-    def __init__(self, factors: Union[Tuple[float, float], float], prob: float = 0.1) -> None:
+    def __init__(
+        self,
+        factors: Union[Tuple[float, float], float],
+        prob: float = 0.1,
+        dtype: DtypeLike = np.float32,
+    ) -> None:
         """
         Args:
             factors: factor range to randomly scale by ``v = v * (1 + factor)``.
                 if single number, factor value is picked from (-factors, factors).
             prob: probability of scale.
+            dtype: output data type, defaults to float32.
 
         """
         RandomizableTransform.__init__(self, prob)
@@ -432,6 +456,7 @@ def __init__(self, factors: Union[Tuple[float, float], float], prob: float = 0.1
         else:
             self.factors = (min(factors), max(factors))
         self.factor = self.factors[0]
+        self.dtype = dtype
 
     def randomize(self, data: Optional[Any] = None) -> None:
         self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
@@ -444,7 +469,7 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         self.randomize()
         if not self._do_transform:
             return img
-        scaler = ScaleIntensity(minv=None, maxv=None, factor=self.factor)
+        scaler = ScaleIntensity(minv=None, maxv=None, factor=self.factor, dtype=self.dtype)
         return scaler(img)
 
 
@@ -516,6 +541,7 @@ def __call__(self, img: np.ndarray):
         """
         Apply the transform to `img`.
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         self.randomize(data=img)
         if not self._do_transform:
             return img
@@ -641,6 +667,8 @@ class ThresholdIntensity(Transform):
         cval: value to fill the remaining parts of the image, default is 0.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, threshold: float, above: bool = True, cval: float = 0.0) -> None:
         if not isinstance(threshold, (int, float)):
             raise ValueError("threshold must be a float or int number.")
@@ -648,13 +676,14 @@ def __init__(self, threshold: float, above: bool = True, cval: float = 0.0) -> N
         self.above = above
         self.cval = cval
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        return np.asarray(
-            np.where(img > self.threshold if self.above else img < self.threshold, img, self.cval), dtype=img.dtype
-        )
+        mask = img > self.threshold if self.above else img < self.threshold
+        res = where(mask, img, self.cval)
+        res, *_ = convert_data_type(res, dtype=img.dtype)
+        return res
 
 
 class ScaleIntensityRange(Transform):
@@ -670,6 +699,8 @@ class ScaleIntensityRange(Transform):
         clip: whether to perform clip after scaling.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, a_min: float, a_max: float, b_min: float, b_max: float, clip: bool = False) -> None:
         self.a_min = a_min
         self.a_max = a_max
@@ -677,7 +708,7 @@ def __init__(self, a_min: float, a_max: float, b_min: float, b_max: float, clip:
         self.b_max = b_max
         self.clip = clip
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
@@ -688,7 +719,7 @@ def __call__(self, img: np.ndarray):
         img = (img - self.a_min) / (self.a_max - self.a_min)
         img = img * (self.b_max - self.b_min) + self.b_min
         if self.clip:
-            img = np.asarray(np.clip(img, self.b_min, self.b_max))
+            img = clip(img, self.b_min, self.b_max)
         return img
 
 
@@ -702,19 +733,22 @@ class AdjustContrast(Transform):
         gamma: gamma value to adjust the contrast as function.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, gamma: float) -> None:
         if not isinstance(gamma, (int, float)):
             raise ValueError("gamma must be a float or int number.")
         self.gamma = gamma
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
         epsilon = 1e-7
         img_min = img.min()
         img_range = img.max() - img_min
-        return np.power(((img - img_min) / float(img_range + epsilon)), self.gamma) * img_range + img_min
+        ret: NdarrayOrTensor = ((img - img_min) / float(img_range + epsilon)) ** self.gamma * img_range + img_min
+        return ret
 
 
 class RandAdjustContrast(RandomizableTransform):
@@ -729,6 +763,8 @@ class RandAdjustContrast(RandomizableTransform):
             If single number, value is picked from (0.5, gamma), default is (0.5, 4.5).
     """
 
+    backend = AdjustContrast.backend
+
     def __init__(self, prob: float = 0.1, gamma: Union[Sequence[float], float] = (0.5, 4.5)) -> None:
         RandomizableTransform.__init__(self, prob)
 
@@ -749,7 +785,7 @@ def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
         self.gamma_value = self.R.uniform(low=self.gamma[0], high=self.gamma[1])
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
@@ -817,6 +853,8 @@ class ScaleIntensityRangePercentiles(Transform):
         relative: whether to scale to the corresponding percentiles of [b_min, b_max].
     """
 
+    backend = ScaleIntensityRange.backend
+
     def __init__(
         self, lower: float, upper: float, b_min: float, b_max: float, clip: bool = False, relative: bool = False
     ) -> None:
@@ -831,12 +869,12 @@ def __init__(
         self.clip = clip
         self.relative = relative
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply the transform to `img`.
         """
-        a_min = np.percentile(img, self.lower)
-        a_max = np.percentile(img, self.upper)
+        a_min: float = percentile(img, self.lower)  # type: ignore
+        a_max: float = percentile(img, self.upper)  # type: ignore
         b_min = self.b_min
         b_max = self.b_max
 
@@ -848,7 +886,7 @@ def __call__(self, img: np.ndarray):
         img = scalar(img)
 
         if self.clip:
-            img = np.asarray(np.clip(img, self.b_min, self.b_max))
+            img = clip(img, self.b_min, self.b_max)
 
         return img
 
@@ -888,10 +926,13 @@ def __call__(self, img: np.ndarray, mask_data: Optional[np.ndarray] = None) -> n
             - ValueError: When ``mask_data`` and ``img`` channels differ and ``mask_data`` is not single channel.
 
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         mask_data = self.mask_data if mask_data is None else mask_data
         if mask_data is None:
             raise ValueError("must provide the mask_data when initializing the transform or at runtime.")
 
+        mask_data, *_ = convert_data_type(mask_data, np.ndarray)  # type: ignore
+
         mask_data = np.asarray(self.select_fn(mask_data))
         if mask_data.shape[0] != 1 and mask_data.shape[0] != img.shape[0]:
             raise ValueError(
@@ -914,7 +955,7 @@ class SavitzkyGolaySmooth(Transform):
             or ``'circular'``. Default: ``'zeros'``. See ``torch.nn.Conv1d()`` for more information.
     """
 
-    backend = [TransformBackends.NUMPY]
+    backend = [TransformBackends.TORCH]
 
     def __init__(self, window_length: int, order: int, axis: int = 1, mode: str = "zeros"):
 
@@ -927,7 +968,7 @@ def __init__(self, window_length: int, order: int, axis: int = 1, mode: str = "z
         self.mode = mode
         self.img_t: torch.Tensor = torch.tensor(0.0)
 
-    def __call__(self, img: NdarrayOrTensor) -> torch.Tensor:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: array containing input data. Must be real and in shape [channels, spatial1, spatial2, ...].
@@ -941,7 +982,9 @@ def __call__(self, img: NdarrayOrTensor) -> torch.Tensor:
         # add one to transform axis because a batch axis will be added at dimension 0
         savgol_filter = SavitzkyGolayFilter(self.window_length, self.order, self.axis + 1, self.mode)
         # convert to Tensor and add Batch axis expected by HilbertTransform
-        out: torch.Tensor = savgol_filter(self.img_t.unsqueeze(0)).squeeze(0)
+        smoothed = savgol_filter(self.img_t.unsqueeze(0)).squeeze(0)
+        out, *_ = convert_to_dst_type(smoothed, dst=img)
+
         return out
 
 
@@ -978,6 +1021,7 @@ def __call__(self, img: np.ndarray):
             np.ndarray containing envelope of data in img along the specified axis.
 
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         # add one to transform axis because a batch axis will be added at dimension 0
         hilbert_transform = HilbertTransform(self.axis + 1, self.n)
         # convert to Tensor and add Batch axis expected by HilbertTransform
@@ -999,14 +1043,24 @@ class GaussianSmooth(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(self, sigma: Union[Sequence[float], float] = 1.0, approx: str = "erf") -> None:
         self.sigma = sigma
         self.approx = approx
 
-    def __call__(self, img: np.ndarray):
-        gaussian_filter = GaussianFilter(img.ndim - 1, self.sigma, approx=self.approx)
-        input_data = torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0)
-        return gaussian_filter(input_data).squeeze(0).detach().numpy()
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float)  # type: ignore
+        sigma: Union[Sequence[torch.Tensor], torch.Tensor]
+        if isinstance(self.sigma, Sequence):
+            sigma = [torch.as_tensor(s, device=img_t.device) for s in self.sigma]
+        else:
+            sigma = torch.as_tensor(self.sigma, device=img_t.device)
+        gaussian_filter = GaussianFilter(img_t.ndim - 1, sigma, approx=self.approx)
+        out_t: torch.Tensor = gaussian_filter(img_t.unsqueeze(0)).squeeze(0)
+        out, *_ = convert_data_type(out_t, type(img), device=img.device if isinstance(img, torch.Tensor) else None)
+        return out
 
 
 class RandGaussianSmooth(RandomizableTransform):
@@ -1023,6 +1077,8 @@ class RandGaussianSmooth(RandomizableTransform):
 
     """
 
+    backend = GaussianSmooth.backend
+
     def __init__(
         self,
         sigma_x: Tuple[float, float] = (0.25, 1.5),
@@ -1047,9 +1103,10 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self.y = self.R.uniform(low=self.sigma_y[0], high=self.sigma_y[1])
         self.z = self.R.uniform(low=self.sigma_z[0], high=self.sigma_z[1])
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         self.randomize()
         if not self._do_transform:
+            img, *_ = convert_data_type(img, dtype=torch.float)
             return img
         sigma = ensure_tuple_size(tup=(self.x, self.y, self.z), dim=img.ndim - 1)
         return GaussianSmooth(sigma=sigma, approx=self.approx)(img)
@@ -1082,6 +1139,8 @@ class GaussianSharpen(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         sigma1: Union[Sequence[float], float] = 3.0,
@@ -1094,13 +1153,19 @@ def __init__(
         self.alpha = alpha
         self.approx = approx
 
-    def __call__(self, img: np.ndarray):
-        gaussian_filter1 = GaussianFilter(img.ndim - 1, self.sigma1, approx=self.approx)
-        gaussian_filter2 = GaussianFilter(img.ndim - 1, self.sigma2, approx=self.approx)
-        input_data = torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0)
-        blurred_f = gaussian_filter1(input_data)
-        filter_blurred_f = gaussian_filter2(blurred_f)
-        return (blurred_f + self.alpha * (blurred_f - filter_blurred_f)).squeeze(0).detach().numpy()
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float32)  # type: ignore
+
+        gf1, gf2 = (
+            GaussianFilter(img_t.ndim - 1, sigma, approx=self.approx).to(img_t.device)
+            for sigma in (self.sigma1, self.sigma2)
+        )
+        blurred_f = gf1(img_t.unsqueeze(0))
+        filter_blurred_f = gf2(blurred_f)
+        out_t: torch.Tensor = (blurred_f + self.alpha * (blurred_f - filter_blurred_f)).squeeze(0)
+        out, *_ = convert_data_type(out_t, type(img), device=img.device if isinstance(img, torch.Tensor) else None)
+        return out
 
 
 class RandGaussianSharpen(RandomizableTransform):
@@ -1125,6 +1190,8 @@ class RandGaussianSharpen(RandomizableTransform):
 
     """
 
+    backend = GaussianSharpen.backend
+
     def __init__(
         self,
         sigma1_x: Tuple[float, float] = (0.5, 1.0),
@@ -1160,9 +1227,11 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self.z2 = self.R.uniform(low=sigma2_z[0], high=sigma2_z[1])
         self.a = self.R.uniform(low=self.alpha[0], high=self.alpha[1])
 
-    def __call__(self, img: np.ndarray):
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         self.randomize()
+        # if not doing, just need to convert to tensor
         if not self._do_transform:
+            img, *_ = convert_data_type(img, dtype=torch.float32)
             return img
         sigma1 = ensure_tuple_size(tup=(self.x1, self.y1, self.z1), dim=img.ndim - 1)
         sigma2 = ensure_tuple_size(tup=(self.x2, self.y2, self.z2), dim=img.ndim - 1)
@@ -1180,6 +1249,8 @@ class RandHistogramShift(RandomizableTransform):
         prob: probability of histogram shift.
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(self, num_control_points: Union[Tuple[int, int], int] = 10, prob: float = 0.1) -> None:
         RandomizableTransform.__init__(self, prob)
 
@@ -1204,80 +1275,20 @@ def randomize(self, data: Optional[Any] = None) -> None:
                 self.floating_control_points[i - 1], self.floating_control_points[i + 1]
             )
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor) -> np.ndarray:
+        img_np: np.ndarray
+        img_np, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         self.randomize()
         if not self._do_transform:
-            return img
-        img_min, img_max = img.min(), img.max()
+            return img_np
+        img_min, img_max = img_np.min(), img_np.max()
         reference_control_points_scaled = self.reference_control_points * (img_max - img_min) + img_min
         floating_control_points_scaled = self.floating_control_points * (img_max - img_min) + img_min
         return np.asarray(
-            np.interp(img, reference_control_points_scaled, floating_control_points_scaled), dtype=img.dtype
+            np.interp(img_np, reference_control_points_scaled, floating_control_points_scaled), dtype=img_np.dtype
         )
 
 
-class RandGibbsNoise(RandomizableTransform):
-    """
-    Naturalistic image augmentation via Gibbs artifacts. The transform
-    randomly applies Gibbs noise to 2D/3D MRI images. Gibbs artifacts
-    are one of the common type of type artifacts appearing in MRI scans.
-
-    The transform is applied to all the channels in the data.
-
-    For general information on Gibbs artifacts, please refer to:
-    https://pubs.rsna.org/doi/full/10.1148/rg.313105115
-    https://pubs.rsna.org/doi/full/10.1148/radiographics.22.4.g02jl14949
-
-
-    Args:
-        prob (float): probability of applying the transform.
-        alpha (float, Sequence(float)): Parametrizes the intensity of the Gibbs noise filter applied. Takes
-            values in the interval [0,1] with alpha = 0 acting as the identity mapping.
-            If a length-2 list is given as [a,b] then the value of alpha will be
-            sampled uniformly from the interval [a,b]. 0 <= a <= b <= 1.
-        as_tensor_output: if true return torch.Tensor, else return np.array. default: True.
-    """
-
-    def __init__(self, prob: float = 0.1, alpha: Sequence[float] = (0.0, 1.0), as_tensor_output: bool = True) -> None:
-
-        if len(alpha) != 2:
-            raise ValueError("alpha length must be 2.")
-        if alpha[1] > 1 or alpha[0] < 0:
-            raise ValueError("alpha must take values in the interval [0,1]")
-        if alpha[0] > alpha[1]:
-            raise ValueError("When alpha = [a,b] we need a < b.")
-
-        self.alpha = alpha
-        self.sampled_alpha = -1.0  # stores last alpha sampled by randomize()
-        self.as_tensor_output = as_tensor_output
-
-        RandomizableTransform.__init__(self, prob=prob)
-
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
-
-        # randomize application and possibly alpha
-        self._randomize(None)
-
-        if self._do_transform:
-            # apply transform
-            transform = GibbsNoise(self.sampled_alpha, self.as_tensor_output)
-            img = transform(img)
-        else:
-            if isinstance(img, np.ndarray) and self.as_tensor_output:
-                img = torch.Tensor(img)
-            elif isinstance(img, torch.Tensor) and not self.as_tensor_output:
-                img = img.detach().cpu().numpy()
-        return img
-
-    def _randomize(self, _: Any) -> None:
-        """
-        (1) Set random variable to apply the transform.
-        (2) Get alpha from uniform distribution.
-        """
-        super().randomize(None)
-        self.sampled_alpha = self.R.uniform(self.alpha[0], self.alpha[1])
-
-
 class GibbsNoise(Transform, Fourier):
     """
     The transform applies Gibbs noise to 2D/3D MRI images. Gibbs artifacts
@@ -1296,21 +1307,20 @@ class GibbsNoise(Transform, Fourier):
     Args:
         alpha: Parametrizes the intensity of the Gibbs noise filter applied. Takes
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
-        as_tensor_output: if true return torch.Tensor, else return np.array. Default: True.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(self, alpha: float = 0.5, as_tensor_output: bool = True) -> None:
 
         if alpha > 1 or alpha < 0:
             raise ValueError("alpha must take values in the interval [0,1].")
         self.alpha = alpha
-        self.as_tensor_output = as_tensor_output
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         n_dims = len(img.shape[1:])
 
-        if isinstance(img, np.ndarray):
-            img = torch.Tensor(img)
         # FT
         k = self.shift_fourier(img, n_dims)
         # build and apply mask
@@ -1318,13 +1328,13 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
         # map back
         img = self.inv_shift_fourier(k, n_dims)
 
-        return img if self.as_tensor_output else img.cpu().detach().numpy()
+        return img
 
-    def _apply_mask(self, k: torch.Tensor) -> torch.Tensor:
+    def _apply_mask(self, k: NdarrayOrTensor) -> NdarrayOrTensor:
         """Builds and applies a mask on the spatial dimensions.
 
         Args:
-            k (np.ndarray): k-space version of the image.
+            k: k-space version of the image.
         Returns:
             masked version of the k-space image.
         """
@@ -1345,11 +1355,73 @@ def _apply_mask(self, k: torch.Tensor) -> torch.Tensor:
         # add channel dimension into mask
         mask = np.repeat(mask[None], k.shape[0], axis=0)
 
+        if isinstance(k, torch.Tensor):
+            mask, *_ = convert_data_type(mask, torch.Tensor, device=k.device)
+
         # apply binary mask
-        k_masked = k * torch.tensor(mask, device=k.device)
+        k_masked: NdarrayOrTensor
+        k_masked = k * mask
         return k_masked
 
 
+class RandGibbsNoise(RandomizableTransform):
+    """
+    Naturalistic image augmentation via Gibbs artifacts. The transform
+    randomly applies Gibbs noise to 2D/3D MRI images. Gibbs artifacts
+    are one of the common type of type artifacts appearing in MRI scans.
+
+    The transform is applied to all the channels in the data.
+
+    For general information on Gibbs artifacts, please refer to:
+    https://pubs.rsna.org/doi/full/10.1148/rg.313105115
+    https://pubs.rsna.org/doi/full/10.1148/radiographics.22.4.g02jl14949
+
+
+    Args:
+        prob (float): probability of applying the transform.
+        alpha (Sequence(float)): Parametrizes the intensity of the Gibbs noise filter applied. Takes
+            values in the interval [0,1] with alpha = 0 acting as the identity mapping.
+            If a length-2 list is given as [a,b] then the value of alpha will be
+            sampled uniformly from the interval [a,b]. 0 <= a <= b <= 1.
+    """
+
+    backend = GibbsNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
+    def __init__(self, prob: float = 0.1, alpha: Sequence[float] = (0.0, 1.0), as_tensor_output: bool = True) -> None:
+
+        if len(alpha) != 2:
+            raise ValueError("alpha length must be 2.")
+        if alpha[1] > 1 or alpha[0] < 0:
+            raise ValueError("alpha must take values in the interval [0,1]")
+        if alpha[0] > alpha[1]:
+            raise ValueError("When alpha = [a,b] we need a < b.")
+
+        self.alpha = alpha
+        self.sampled_alpha = -1.0  # stores last alpha sampled by randomize()
+
+        RandomizableTransform.__init__(self, prob=prob)
+
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+
+        # randomize application and possibly alpha
+        self._randomize(None)
+
+        if self._do_transform:
+            # apply transform
+            transform = GibbsNoise(self.sampled_alpha)
+            img = transform(img)
+        return img
+
+    def _randomize(self, _: Any) -> None:
+        """
+        (1) Set random variable to apply the transform.
+        (2) Get alpha from uniform distribution.
+        """
+        super().randomize(None)
+        self.sampled_alpha = self.R.uniform(self.alpha[0], self.alpha[1])
+
+
 class KSpaceSpikeNoise(Transform, Fourier):
     """
     Apply localized spikes in `k`-space at the given locations and intensities.
@@ -1377,8 +1449,6 @@ class KSpaceSpikeNoise(Transform, Fourier):
             receive a sequence of intensities. This value should be tested as it is
             data-dependent. The default values are the 2.5 the mean of the
             log-intensity for each channel.
-        as_tensor_output: if ``True`` return torch.Tensor, else return np.array.
-            Default: ``True``.
 
     Example:
         When working with 4D data, ``KSpaceSpikeNoise(loc = ((3,60,64,32), (64,60,32)), k_intensity = (13,14))``
@@ -1387,6 +1457,9 @@ class KSpaceSpikeNoise(Transform, Fourier):
         with `log-intensity = 14`.
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         loc: Union[Tuple, Sequence[Tuple]],
@@ -1395,7 +1468,6 @@ def __init__(
     ):
 
         self.loc = ensure_tuple(loc)
-        self.as_tensor_output = as_tensor_output
         self.k_intensity = k_intensity
 
         # assert one-to-one relationship between factors and locations
@@ -1409,7 +1481,7 @@ def __init__(
         if isinstance(self.loc[0], Sequence) and k_intensity is not None and not isinstance(self.k_intensity, Sequence):
             raise ValueError("There must be one intensity_factor value for each tuple of indices in loc.")
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: image with dimensions (C, H, W) or (C, H, W, D)
@@ -1421,22 +1493,21 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
             raise RuntimeError("Image needs a channel direction.")
         if isinstance(self.loc[0], int) and len(img.shape) == 4 and len(self.loc) == 2:
             raise RuntimeError("Input images of dimension 4 need location tuple to be length 3 or 4")
-        if isinstance(self.loc[0], Sequence) and len(img.shape) == 4 and min(map(lambda x: len(x), self.loc)) == 2:
+        if isinstance(self.loc[0], Sequence) and len(img.shape) == 4 and min(map(len, self.loc)) == 2:
             raise RuntimeError("Input images of dimension 4 need location tuple to be length 3 or 4")
 
         n_dims = len(img.shape[1:])
 
-        if isinstance(img, np.ndarray):
-            img = torch.Tensor(img)
         # FT
         k = self.shift_fourier(img, n_dims)
-        log_abs = torch.log(torch.absolute(k) + 1e-10)
-        phase = torch.angle(k)
+        lib = np if isinstance(k, np.ndarray) else torch
+        log_abs = lib.log(lib.abs(k) + 1e-10)  # type: ignore
+        phase = lib.angle(k)  # type: ignore
 
         k_intensity = self.k_intensity
         # default log intensity
         if k_intensity is None:
-            k_intensity = tuple(torch.mean(log_abs, dim=tuple(range(-n_dims, 0))) * 2.5)
+            k_intensity = tuple(lib.mean(log_abs, axis=tuple(range(-n_dims, 0))) * 2.5)  # type: ignore
 
         # highlight
         if isinstance(self.loc[0], Sequence):
@@ -1445,10 +1516,10 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
         else:
             self._set_spike(log_abs, self.loc, k_intensity)
         # map back
-        k = torch.exp(log_abs) * torch.exp(1j * phase)
-        img = self.inv_shift_fourier(k, n_dims)
+        k = lib.exp(log_abs) * lib.exp(1j * phase)  # type: ignore
+        img, *_ = convert_to_dst_type(self.inv_shift_fourier(k, n_dims), dst=img)
 
-        return img if self.as_tensor_output else img.cpu().detach().numpy()
+        return img
 
     def _check_indices(self, img) -> None:
         """Helper method to check consistency of self.loc and input image.
@@ -1468,7 +1539,7 @@ def _check_indices(self, img) -> None:
                     f"The index value at position {i} of one of the tuples in loc = {self.loc} is out of bounds for current image."
                 )
 
-    def _set_spike(self, k: torch.Tensor, idx: Tuple, val: Union[Sequence[float], float]):
+    def _set_spike(self, k: NdarrayOrTensor, idx: Tuple, val: Union[Sequence[float], float]):
         """
         Helper function to introduce a given intensity at given location.
 
@@ -1514,8 +1585,6 @@ class RandKSpaceSpikeNoise(RandomizableTransform, Fourier):
             log-intensity for each channel.
         channel_wise: treat each channel independently. True by
             default.
-        as_tensor_output: if True return torch.Tensor, else
-            return np.array. default: True.
 
     Example:
         To apply `k`-space spikes randomly with probability 0.5, and
@@ -1524,6 +1593,9 @@ class RandKSpaceSpikeNoise(RandomizableTransform, Fourier):
         ``RandKSpaceSpikeNoise(prob=0.5, intensity_range=(11, 12), channel_wise=True)``
     """
 
+    backend = KSpaceSpikeNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         prob: float = 0.1,
@@ -1534,7 +1606,6 @@ def __init__(
 
         self.intensity_range = intensity_range
         self.channel_wise = channel_wise
-        self.as_tensor_output = as_tensor_output
         self.sampled_k_intensity: List = []
         self.sampled_locs: List[Tuple] = []
 
@@ -1543,7 +1614,7 @@ def __init__(
 
         super().__init__(prob)
 
-    def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor, np.ndarray]:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Apply transform to `img`. Assumes data is in channel-first form.
 
@@ -1562,20 +1633,18 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]) -> Union[torch.Tensor,
         self.sampled_k_intensity = []
         self.sampled_locs = []
 
-        if not isinstance(img, torch.Tensor):
-            img = torch.Tensor(img)
-
         intensity_range = self._make_sequence(img)
         self._randomize(img, intensity_range)
 
         # build/appy transform only if there are spike locations
         if self.sampled_locs:
-            transform = KSpaceSpikeNoise(self.sampled_locs, self.sampled_k_intensity, self.as_tensor_output)
-            return transform(img)
+            transform = KSpaceSpikeNoise(self.sampled_locs, self.sampled_k_intensity)
+            out: NdarrayOrTensor = transform(img)
+            return out
 
-        return img if self.as_tensor_output else img.detach().numpy()
+        return img
 
-    def _randomize(self, img: torch.Tensor, intensity_range: Sequence[Sequence[float]]) -> None:
+    def _randomize(self, img: NdarrayOrTensor, intensity_range: Sequence[Sequence[float]]) -> None:
         """
         Helper method to sample both the location and intensity of the spikes.
         When not working channel wise (channel_wise=False) it use the random
@@ -1603,7 +1672,7 @@ def _randomize(self, img: torch.Tensor, intensity_range: Sequence[Sequence[float
                 else:
                     self.sampled_k_intensity = [self.R.uniform(intensity_range[0], intensity_range[1])] * len(img)
 
-    def _make_sequence(self, x: torch.Tensor) -> Sequence[Sequence[float]]:
+    def _make_sequence(self, x: NdarrayOrTensor) -> Sequence[Sequence[float]]:
         """
         Formats the sequence of intensities ranges to Sequence[Sequence[float]].
         """
@@ -1615,7 +1684,7 @@ def _make_sequence(self, x: torch.Tensor) -> Sequence[Sequence[float]]:
             return (ensure_tuple(self.intensity_range),) * x.shape[0]
         return ensure_tuple(self.intensity_range)
 
-    def _set_default_range(self, img: torch.Tensor) -> Sequence[Sequence[float]]:
+    def _set_default_range(self, img: NdarrayOrTensor) -> Sequence[Sequence[float]]:
         """
         Sets default intensity ranges to be sampled.
 
@@ -1625,18 +1694,17 @@ def _set_default_range(self, img: torch.Tensor) -> Sequence[Sequence[float]]:
         n_dims = len(img.shape[1:])
 
         k = self.shift_fourier(img, n_dims)
-        log_abs = torch.log(torch.absolute(k) + 1e-10)
-        shifted_means = torch.mean(log_abs, dim=tuple(range(-n_dims, 0))) * 2.5
+        mod = torch if isinstance(k, torch.Tensor) else np
+        log_abs = mod.log(mod.absolute(k) + 1e-10)  # type: ignore
+        shifted_means = mod.mean(log_abs, dim=tuple(range(-n_dims, 0))) * 2.5  # type: ignore
         return tuple((i * 0.95, i * 1.1) for i in shifted_means)
 
 
-class RandCoarseDropout(RandomizableTransform):
+class RandCoarseTransform(RandomizableTransform):
     """
-    Randomly coarse dropout regions in the image, then fill in the rectangular regions with specified value.
-    Or keep the rectangular regions and fill in the other areas with specified value.
-    Refer to papers: https://arxiv.org/abs/1708.04552, https://arxiv.org/pdf/1604.07379
-    And other implementation: https://albumentations.ai/docs/api_reference/augmentations/transforms/
-    #albumentations.augmentations.transforms.CoarseDropout.
+    Randomly select coarse regions in the image, then execute transform operations for the regions.
+    It's the base class of all kinds of region transforms.
+    Refer to papers: https://arxiv.org/abs/1708.04552
 
     Args:
         holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
@@ -1646,12 +1714,6 @@ class RandCoarseDropout(RandomizableTransform):
             if some components of the `spatial_size` are non-positive values, the transform will use the
             corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
             to `(32, 64)` if the second spatial dimension size of img is `64`.
-        dropout_holes: if `True`, dropout the regions of holes and fill value, if `False`, keep the holes and
-            dropout the outside and fill value. default to `True`.
-        fill_value: target value to fill the dropout regions, if providing a number, will use it as constant
-            value to fill all the regions. if providing a tuple for the `min` and `max`, will randomly select
-            value for every pixel / voxel from the range `[min, max)`. if None, will compute the `min` and `max`
-            value of input image then randomly select value to fill, default to None.
         max_holes: if not None, define the maximum number to randomly select the expected number of regions.
         max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
             if some components of the `max_spatial_size` are non-positive values, the transform will use the
@@ -1665,8 +1727,6 @@ def __init__(
         self,
         holes: int,
         spatial_size: Union[Sequence[int], int],
-        dropout_holes: bool = True,
-        fill_value: Optional[Union[Tuple[float, float], float]] = None,
         max_holes: Optional[int] = None,
         max_spatial_size: Optional[Union[Sequence[int], int]] = None,
         prob: float = 0.1,
@@ -1676,11 +1736,6 @@ def __init__(
             raise ValueError("number of holes must be greater than 0.")
         self.holes = holes
         self.spatial_size = spatial_size
-        self.dropout_holes = dropout_holes
-        if isinstance(fill_value, (tuple, list)):
-            if len(fill_value) != 2:
-                raise ValueError("fill value should contain 2 numbers if providing the `min` and `max`.")
-        self.fill_value = fill_value
         self.max_holes = max_holes
         self.max_spatial_size = max_spatial_size
         self.hole_coords: List = []
@@ -1697,28 +1752,142 @@ def randomize(self, img_size: Sequence[int]) -> None:
             valid_size = get_valid_patch_size(img_size, size)
             self.hole_coords.append((slice(None),) + get_random_patch(img_size, valid_size, self.R))
 
+    @abstractmethod
+    def _transform_holes(self, img: np.ndarray) -> np.ndarray:
+        """
+        Transform the randomly selected `self.hole_coords` in input images.
+
+        """
+        raise NotImplementedError(f"Subclass {self.__class__.__name__} must implement this method.")
+
     def __call__(self, img: np.ndarray):
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         self.randomize(img.shape[1:])
-        ret = img
         if self._do_transform:
-            fill_value = (img.min(), img.max()) if self.fill_value is None else self.fill_value
-
-            if self.dropout_holes:
-                for h in self.hole_coords:
-                    if isinstance(fill_value, (tuple, list)):
-                        ret[h] = self.R.uniform(fill_value[0], fill_value[1], size=img[h].shape)
-                    else:
-                        ret[h] = fill_value
-            else:
+            img = self._transform_holes(img=img)
+
+        return img
+
+
+class RandCoarseDropout(RandCoarseTransform):
+    """
+    Randomly coarse dropout regions in the image, then fill in the rectangular regions with specified value.
+    Or keep the rectangular regions and fill in the other areas with specified value.
+    Refer to papers: https://arxiv.org/abs/1708.04552, https://arxiv.org/pdf/1604.07379
+    And other implementation: https://albumentations.ai/docs/api_reference/augmentations/transforms/
+    #albumentations.augmentations.transforms.CoarseDropout.
+
+    Args:
+        holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
+            randomly select the expected number of regions.
+        spatial_size: spatial size of the regions to dropout, if `max_spatial_size` is not None, use this arg
+            as the minimum spatial size to randomly select size for every region.
+            if some components of the `spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        dropout_holes: if `True`, dropout the regions of holes and fill value, if `False`, keep the holes and
+            dropout the outside and fill value. default to `True`.
+        fill_value: target value to fill the dropout regions, if providing a number, will use it as constant
+            value to fill all the regions. if providing a tuple for the `min` and `max`, will randomly select
+            value for every pixel / voxel from the range `[min, max)`. if None, will compute the `min` and `max`
+            value of input image then randomly select value to fill, default to None.
+        max_holes: if not None, define the maximum number to randomly select the expected number of regions.
+        max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
+            if some components of the `max_spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `max_spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        prob: probability of applying the transform.
+
+    """
+
+    def __init__(
+        self,
+        holes: int,
+        spatial_size: Union[Sequence[int], int],
+        dropout_holes: bool = True,
+        fill_value: Optional[Union[Tuple[float, float], float]] = None,
+        max_holes: Optional[int] = None,
+        max_spatial_size: Optional[Union[Sequence[int], int]] = None,
+        prob: float = 0.1,
+    ) -> None:
+        super().__init__(
+            holes=holes,
+            spatial_size=spatial_size,
+            max_holes=max_holes,
+            max_spatial_size=max_spatial_size,
+            prob=prob,
+        )
+        self.dropout_holes = dropout_holes
+        if isinstance(fill_value, (tuple, list)):
+            if len(fill_value) != 2:
+                raise ValueError("fill value should contain 2 numbers if providing the `min` and `max`.")
+        self.fill_value = fill_value
+
+    def _transform_holes(self, img: np.ndarray):
+        """
+        Fill the randomly selected `self.hole_coords` in input images.
+        Please note that we usually only use `self.R` in `randomize()` method, here is a special case.
+
+        """
+        fill_value = (img.min(), img.max()) if self.fill_value is None else self.fill_value
+
+        if self.dropout_holes:
+            for h in self.hole_coords:
                 if isinstance(fill_value, (tuple, list)):
-                    ret = self.R.uniform(fill_value[0], fill_value[1], size=img.shape).astype(img.dtype)
+                    img[h] = self.R.uniform(fill_value[0], fill_value[1], size=img[h].shape)
                 else:
-                    ret = np.full_like(img, fill_value)
-                for h in self.hole_coords:
-                    ret[h] = img[h]
+                    img[h] = fill_value
+            ret = img
+        else:
+            if isinstance(fill_value, (tuple, list)):
+                ret = self.R.uniform(fill_value[0], fill_value[1], size=img.shape).astype(img.dtype)
+            else:
+                ret = np.full_like(img, fill_value)
+            for h in self.hole_coords:
+                ret[h] = img[h]
         return ret
 
 
+class RandCoarseShuffle(RandCoarseTransform):
+    """
+    Randomly select regions in the image, then shuffle the pixels within every region.
+    It shuffles every channel separately.
+    Refer to paper:
+    Kang, Guoliang, et al. "Patchshuffle regularization." arXiv preprint arXiv:1707.07103 (2017).
+    https://arxiv.org/abs/1707.07103
+
+    Args:
+        holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
+            randomly select the expected number of regions.
+        spatial_size: spatial size of the regions to dropout, if `max_spatial_size` is not None, use this arg
+            as the minimum spatial size to randomly select size for every region.
+            if some components of the `spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        max_holes: if not None, define the maximum number to randomly select the expected number of regions.
+        max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
+            if some components of the `max_spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `max_spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        prob: probability of applying the transform.
+
+    """
+
+    def _transform_holes(self, img: np.ndarray):
+        """
+        Shuffle the content of randomly selected `self.hole_coords` in input images.
+        Please note that we usually only use `self.R` in `randomize()` method, here is a special case.
+
+        """
+        for h in self.hole_coords:
+            # shuffle every channel separately
+            for i, c in enumerate(img[h]):
+                patch_channel = c.flatten()
+                self.R.shuffle(patch_channel)
+                img[h][i] = patch_channel.reshape(c.shape)
+        return img
+
+
 class HistogramNormalize(Transform):
     """
     Apply the histogram normalization to input image.
@@ -1736,12 +1905,14 @@ class HistogramNormalize(Transform):
 
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(
         self,
         num_bins: int = 256,
         min: int = 0,
         max: int = 255,
-        mask: Optional[np.ndarray] = None,
+        mask: Optional[NdarrayOrTensor] = None,
         dtype: DtypeLike = np.float32,
     ) -> None:
         self.num_bins = num_bins
@@ -1750,7 +1921,7 @@ def __init__(
         self.mask = mask
         self.dtype = dtype
 
-    def __call__(self, img: np.ndarray, mask: Optional[np.ndarray] = None) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor, mask: Optional[NdarrayOrTensor] = None) -> np.ndarray:
         return equalize_hist(
             img=img,
             mask=mask if mask is not None else self.mask,
@@ -1759,95 +1930,3 @@ def __call__(self, img: np.ndarray, mask: Optional[np.ndarray] = None) -> np.nda
             max=self.max,
             dtype=self.dtype,
         )
-
-
-class LocalPatchShuffling(RandomizableTransform):
-    """
-    Takes a 3D image and based on input of the local patch size, shuffles the pixels of the local patch within it.
-    This process is repeated a for N number of times where every time a different random block is selected for local
-    pixel shuffling.
-
-    Kang, Guoliang, et al. "Patchshuffle regularization." arXiv preprint arXiv:1707.07103 (2017).
-    """
-
-    def __init__(
-        self,
-        prob: float = 1.0,
-        number_blocks: int = 1000,
-        blocksize_ratio: int = 10,
-        channel_wise: bool = True,
-        device: Optional[torch.device] = None,
-        image_only: bool = False,
-    ) -> None:
-        """
-        Args:
-            prob: The chance of this transform occuring on the given volume.
-            number_blocks: Total number of time a random 3D block will be selected for local shuffling of pixels/voxels
-                contained in the block.
-            blocksize_ratio: This ratio can be used to estimate the local 3D block sizes that will be selected.
-            channel_wise: If True, treats each channel of the image separately.
-            device: device on which the tensor will be allocated.
-            image_only: if True return only the image volume, otherwise return (image, affine).
-        """
-        RandomizableTransform.__init__(self, prob)
-        self.prob = prob
-        self.number_blocks = number_blocks
-        self.blocksize_ratio = blocksize_ratio
-        self.channel_wise = channel_wise
-
-    def _local_patch_shuffle(self, img: Union[torch.Tensor, np.ndarray], number_blocks: int, blocksize_ratio: int):
-        im_shape = img.shape
-        img_copy = copy.deepcopy(img)
-        for _each_block in range(number_blocks):
-
-            block_size_x = self.R.randint(1, im_shape[0] // blocksize_ratio)
-            block_size_y = self.R.randint(1, im_shape[1] // blocksize_ratio)
-            block_size_z = self.R.randint(1, im_shape[2] // blocksize_ratio)
-
-            noise_x = self.R.randint(0, im_shape[0] - block_size_x)
-            noise_y = self.R.randint(0, im_shape[1] - block_size_y)
-            noise_z = self.R.randint(0, im_shape[2] - block_size_z)
-
-            local_patch = img[
-                noise_x : noise_x + block_size_x,
-                noise_y : noise_y + block_size_y,
-                noise_z : noise_z + block_size_z,
-            ]
-
-            local_patch = local_patch.flatten()
-            self.R.shuffle(local_patch)
-            local_patch = local_patch.reshape((block_size_x, block_size_y, block_size_z))
-
-            img_copy[
-                noise_x : noise_x + block_size_x, noise_y : noise_y + block_size_y, noise_z : noise_z + block_size_z
-            ] = local_patch
-
-        shuffled_image = img_copy
-        return shuffled_image
-
-    def __call__(
-        self,
-        img: Union[np.ndarray, torch.Tensor],
-        # spatial_size: Optional[Union[Sequence[int], int]] = None,
-        # mode: Optional[Union[GridSampleMode, str]] = None,
-        # padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ):
-        """
-        Args:
-            img: shape must be (num_channels, H, W[, D]),
-
-        """
-
-        super().randomize(None)
-        if not self._do_transform:
-            return img
-
-        if self.channel_wise:
-            # img = self._local_patch_shuffle(img=img)
-            for i, _d in enumerate(img):
-                img[i] = self._local_patch_shuffle(
-                    img=img[i], blocksize_ratio=self.blocksize_ratio, number_blocks=self.number_blocks
-                )
-        else:
-            raise AssertionError("If channel_wise is False, the image needs to be set to channel first")
-        return img
diff --git a/monai/transforms/intensity/dictionary.py b/monai/transforms/intensity/dictionary.py
index bc53fb6b7b..8681093168 100644
--- a/monai/transforms/intensity/dictionary.py
+++ b/monai/transforms/intensity/dictionary.py
@@ -34,6 +34,7 @@
     NormalizeIntensity,
     RandBiasField,
     RandCoarseDropout,
+    RandCoarseShuffle,
     RandGaussianNoise,
     RandKSpaceSpikeNoise,
     RandRicianNoise,
@@ -47,6 +48,9 @@
 from monai.transforms.transform import MapTransform, Randomizable, RandomizableTransform
 from monai.transforms.utils import is_positive
 from monai.utils import convert_to_dst_type, ensure_tuple, ensure_tuple_rep, ensure_tuple_size
+from monai.utils.deprecated import deprecated_arg
+from monai.utils.enums import TransformBackends
+from monai.utils.type_conversion import convert_data_type
 
 __all__ = [
     "RandGaussianNoised",
@@ -75,6 +79,7 @@
     "RandKSpaceSpikeNoised",
     "RandHistogramShiftd",
     "RandCoarseDropoutd",
+    "RandCoarseShuffled",
     "HistogramNormalized",
     "RandGaussianNoiseD",
     "RandGaussianNoiseDict",
@@ -126,6 +131,8 @@
     "RandRicianNoiseDict",
     "RandCoarseDropoutD",
     "RandCoarseDropoutDict",
+    "RandCoarseShuffleD",
+    "RandCoarseShuffleDict",
     "HistogramNormalizeD",
     "HistogramNormalizeDict",
 ]
@@ -162,24 +169,19 @@ def __init__(
         self.std = std
         self._noise: List[np.ndarray] = []
 
-    def randomize(self, im_shape: Sequence[int]) -> None:
-        super().randomize(None)
-        self._noise.clear()
-        for m in self.mean:
-            self._noise.append(self.R.normal(m, self.R.uniform(0, self.std), size=im_shape))
+    def _add_noise(self, img: NdarrayTensor, mean: float) -> NdarrayTensor:
+        noise = self.R.normal(mean, self.R.uniform(0, self.std), size=img.shape)
+        noise_, *_ = convert_to_dst_type(noise, img)
+        return img + noise_
 
     def __call__(self, data: Mapping[Hashable, NdarrayTensor]) -> Dict[Hashable, NdarrayTensor]:
         d = dict(data)
-
-        image_shape = d[self.keys[0]].shape  # image shape from the first data key
-        self.randomize(image_shape)
-        if len(self._noise) != len(self.keys):
-            raise RuntimeError("inconsistent noise items and keys.")
+        super().randomize(None)
         if not self._do_transform:
             return d
-        for key, noise in self.key_iterator(d, self._noise):
-            noise, *_ = convert_to_dst_type(noise, d[key])
-            d[key] = d[key] + noise
+
+        for key, mean in self.key_iterator(d, self.mean):
+            d[key] = self._add_noise(img=d[key], mean=mean)
         return d
 
 
@@ -484,6 +486,8 @@ def __init__(
         minv: Optional[float] = 0.0,
         maxv: Optional[float] = 1.0,
         factor: Optional[float] = None,
+        channel_wise: bool = False,
+        dtype: DtypeLike = np.float32,
         allow_missing_keys: bool = False,
     ) -> None:
         """
@@ -494,11 +498,14 @@ def __init__(
             maxv: maximum value of output data.
             factor: factor scale by ``v = v * (1 + factor)``. In order to use
                 this parameter, please set `minv` and `maxv` into None.
+            channel_wise: if True, scale on each channel separately. Please ensure
+                that the first dimension represents the channel of the image if True.
+            dtype: output data type, defaults to float32.
             allow_missing_keys: don't raise exception if key is missing.
 
         """
         super().__init__(keys, allow_missing_keys)
-        self.scaler = ScaleIntensity(minv, maxv, factor)
+        self.scaler = ScaleIntensity(minv, maxv, factor, channel_wise, dtype)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -519,6 +526,7 @@ def __init__(
         keys: KeysCollection,
         factors: Union[Tuple[float, float], float],
         prob: float = 0.1,
+        dtype: DtypeLike = np.float32,
         allow_missing_keys: bool = False,
     ) -> None:
         """
@@ -529,6 +537,7 @@ def __init__(
                 if single number, factor value is picked from (-factors, factors).
             prob: probability of rotating.
                 (Default 0.1, with 10% probability it returns a rotated array.)
+            dtype: output data type, defaults to float32.
             allow_missing_keys: don't raise exception if key is missing.
 
         """
@@ -542,6 +551,7 @@ def __init__(
         else:
             self.factors = (min(factors), max(factors))
         self.factor = self.factors[0]
+        self.dtype = dtype
 
     def randomize(self, data: Optional[Any] = None) -> None:
         self.factor = self.R.uniform(low=self.factors[0], high=self.factors[1])
@@ -552,7 +562,7 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
         self.randomize()
         if not self._do_transform:
             return d
-        scaler = ScaleIntensity(minv=None, maxv=None, factor=self.factor)
+        scaler = ScaleIntensity(minv=None, maxv=None, factor=self.factor, dtype=self.dtype)
         for key in self.key_iterator(d):
             d[key] = scaler(d[key])
         return d
@@ -655,6 +665,8 @@ class ThresholdIntensityd(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = ThresholdIntensity.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -666,7 +678,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.filter = ThresholdIntensity(threshold, above, cval)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.filter(d[key])
@@ -688,6 +700,8 @@ class ScaleIntensityRanged(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = ScaleIntensityRange.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -701,7 +715,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.scaler = ScaleIntensityRange(a_min, a_max, b_min, b_max, clip)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.scaler(d[key])
@@ -722,11 +736,13 @@ class AdjustContrastd(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = AdjustContrast.backend
+
     def __init__(self, keys: KeysCollection, gamma: float, allow_missing_keys: bool = False) -> None:
         super().__init__(keys, allow_missing_keys)
         self.adjuster = AdjustContrast(gamma)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.adjuster(d[key])
@@ -749,6 +765,8 @@ class RandAdjustContrastd(RandomizableTransform, MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = AdjustContrast.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -776,7 +794,7 @@ def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
         self.gamma_value = self.R.uniform(low=self.gamma[0], high=self.gamma[1])
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         self.randomize()
         if self.gamma_value is None:
@@ -805,6 +823,8 @@ class ScaleIntensityRangePercentilesd(MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = ScaleIntensityRangePercentiles.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -819,7 +839,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.scaler = ScaleIntensityRangePercentiles(lower, upper, b_min, b_max, clip, relative)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.scaler(d[key])
@@ -882,6 +902,8 @@ class GaussianSmoothd(MapTransform):
 
     """
 
+    backend = GaussianSmooth.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -892,7 +914,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.converter = GaussianSmooth(sigma, approx=approx)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.converter(d[key])
@@ -916,6 +938,8 @@ class RandGaussianSmoothd(RandomizableTransform, MapTransform):
 
     """
 
+    backend = GaussianSmooth.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -939,14 +963,15 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self.y = self.R.uniform(low=self.sigma_y[0], high=self.sigma_y[1])
         self.z = self.R.uniform(low=self.sigma_z[0], high=self.sigma_z[1])
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         self.randomize()
-        if not self._do_transform:
-            return d
         for key in self.key_iterator(d):
-            sigma = ensure_tuple_size(tup=(self.x, self.y, self.z), dim=d[key].ndim - 1)
-            d[key] = GaussianSmooth(sigma=sigma, approx=self.approx)(d[key])
+            if self._do_transform:
+                sigma = ensure_tuple_size(tup=(self.x, self.y, self.z), dim=d[key].ndim - 1)
+                d[key] = GaussianSmooth(sigma=sigma, approx=self.approx)(d[key])
+            else:
+                d[key], *_ = convert_data_type(d[key], torch.Tensor, dtype=torch.float)
         return d
 
 
@@ -970,6 +995,8 @@ class GaussianSharpend(MapTransform):
 
     """
 
+    backend = GaussianSharpen.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -982,7 +1009,7 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.converter = GaussianSharpen(sigma1, sigma2, alpha, approx=approx)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.converter(d[key])
@@ -1013,6 +1040,8 @@ class RandGaussianSharpend(RandomizableTransform, MapTransform):
 
     """
 
+    backend = GaussianSharpen.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1051,15 +1080,17 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self.z2 = self.R.uniform(low=sigma2_z[0], high=sigma2_z[1])
         self.a = self.R.uniform(low=self.alpha[0], high=self.alpha[1])
 
-    def __call__(self, data):
+    def __call__(self, data: Dict[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         self.randomize()
-        if not self._do_transform:
-            return d
         for key in self.key_iterator(d):
-            sigma1 = ensure_tuple_size(tup=(self.x1, self.y1, self.z1), dim=d[key].ndim - 1)
-            sigma2 = ensure_tuple_size(tup=(self.x2, self.y2, self.z2), dim=d[key].ndim - 1)
-            d[key] = GaussianSharpen(sigma1=sigma1, sigma2=sigma2, alpha=self.a, approx=self.approx)(d[key])
+            if self._do_transform:
+                sigma1 = ensure_tuple_size(tup=(self.x1, self.y1, self.z1), dim=d[key].ndim - 1)
+                sigma2 = ensure_tuple_size(tup=(self.x2, self.y2, self.z2), dim=d[key].ndim - 1)
+                d[key] = GaussianSharpen(sigma1=sigma1, sigma2=sigma2, alpha=self.a, approx=self.approx)(d[key])
+            else:
+                # if not doing the transform, convert to torch
+                d[key], *_ = convert_data_type(d[key], torch.Tensor, dtype=torch.float32)
         return d
 
 
@@ -1078,6 +1109,8 @@ class RandHistogramShiftd(RandomizableTransform, MapTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = [TransformBackends.NUMPY]
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1108,17 +1141,19 @@ def randomize(self, data: Optional[Any] = None) -> None:
                 self.floating_control_points[i - 1], self.floating_control_points[i + 1]
             )
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         self.randomize()
-        if not self._do_transform:
-            return d
         for key in self.key_iterator(d):
-            img_min, img_max = d[key].min(), d[key].max()
-            reference_control_points_scaled = self.reference_control_points * (img_max - img_min) + img_min
-            floating_control_points_scaled = self.floating_control_points * (img_max - img_min) + img_min
-            dtype = d[key].dtype
-            d[key] = np.interp(d[key], reference_control_points_scaled, floating_control_points_scaled).astype(dtype)
+            d[key] = convert_data_type(d[key], np.ndarray)[0]
+            if self._do_transform:
+                img_min, img_max = d[key].min(), d[key].max()
+                reference_control_points_scaled = self.reference_control_points * (img_max - img_min) + img_min
+                floating_control_points_scaled = self.floating_control_points * (img_max - img_min) + img_min
+                dtype = d[key].dtype
+                d[key] = np.interp(d[key], reference_control_points_scaled, floating_control_points_scaled).astype(
+                    dtype
+                )
         return d
 
 
@@ -1144,28 +1179,27 @@ class RandGibbsNoised(RandomizableTransform, MapTransform):
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
             If a length-2 list is given as [a,b] then the value of alpha will be sampled
             uniformly from the interval [a,b].
-        as_tensor_output: if true return torch.Tensor, else return np.array. default: True.
         allow_missing_keys: do not raise exception if key is missing.
     """
 
+    backend = GibbsNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
         prob: float = 0.1,
         alpha: Sequence[float] = (0.0, 1.0),
-        as_tensor_output: bool = True,
         allow_missing_keys: bool = False,
+        as_tensor_output: bool = True,
     ) -> None:
 
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob=prob)
         self.alpha = alpha
         self.sampled_alpha = -1.0  # stores last alpha sampled by randomize()
-        self.as_tensor_output = as_tensor_output
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
 
         d = dict(data)
         self._randomize(None)
@@ -1173,13 +1207,8 @@ def __call__(
         for i, key in enumerate(self.key_iterator(d)):
             if self._do_transform:
                 if i == 0:
-                    transform = GibbsNoise(self.sampled_alpha, self.as_tensor_output)
+                    transform = GibbsNoise(self.sampled_alpha)
                 d[key] = transform(d[key])
-            else:
-                if isinstance(d[key], np.ndarray) and self.as_tensor_output:
-                    d[key] = torch.Tensor(d[key])
-                elif isinstance(d[key], torch.Tensor) and not self.as_tensor_output:
-                    d[key] = self._to_numpy(d[key])
         return d
 
     def _randomize(self, _: Any) -> None:
@@ -1190,11 +1219,6 @@ def _randomize(self, _: Any) -> None:
         super().randomize(None)
         self.sampled_alpha = self.R.uniform(self.alpha[0], self.alpha[1])
 
-    def _to_numpy(self, d: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
-        if isinstance(d, torch.Tensor):
-            d_numpy: np.ndarray = d.cpu().detach().numpy()
-        return d_numpy
-
 
 class GibbsNoised(MapTransform):
     """
@@ -1212,20 +1236,20 @@ class GibbsNoised(MapTransform):
                 you need to transform.
         alpha (float): Parametrizes the intensity of the Gibbs noise filter applied. Takes
             values in the interval [0,1] with alpha = 0 acting as the identity mapping.
-        as_tensor_output: if true return torch.Tensor, else return np.array. default: True.
         allow_missing_keys: do not raise exception if key is missing.
     """
 
+    backend = GibbsNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
-        self, keys: KeysCollection, alpha: float = 0.5, as_tensor_output: bool = True, allow_missing_keys: bool = False
+        self, keys: KeysCollection, alpha: float = 0.5, allow_missing_keys: bool = False, as_tensor_output: bool = True
     ) -> None:
 
         MapTransform.__init__(self, keys, allow_missing_keys)
-        self.transform = GibbsNoise(alpha, as_tensor_output)
+        self.transform = GibbsNoise(alpha)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
 
         d = dict(data)
         for key in self.key_iterator(d):
@@ -1264,8 +1288,6 @@ class KSpaceSpikeNoised(MapTransform):
             receive a sequence of intensities. This value should be tested as it is
             data-dependent. The default values are the 2.5 the mean of the
             log-intensity for each channel.
-        as_tensor_output: if ``True`` return torch.Tensor, else return np.array.
-            Default: ``True``.
         allow_missing_keys: do not raise exception if key is missing.
 
     Example:
@@ -1276,21 +1298,22 @@ class KSpaceSpikeNoised(MapTransform):
         with `log-intensity = 14`.
     """
 
+    backend = KSpaceSpikeNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
         loc: Union[Tuple, Sequence[Tuple]],
         k_intensity: Optional[Union[Sequence[float], float]] = None,
-        as_tensor_output: bool = True,
         allow_missing_keys: bool = False,
+        as_tensor_output: bool = True,
     ) -> None:
 
         super().__init__(keys, allow_missing_keys)
-        self.transform = KSpaceSpikeNoise(loc, k_intensity, as_tensor_output)
+        self.transform = KSpaceSpikeNoise(loc, k_intensity)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         """
         Args:
             data: Expects image/label to have dimensions (C, H, W) or
@@ -1337,8 +1360,6 @@ class RandKSpaceSpikeNoised(RandomizableTransform, MapTransform):
         common_sampling: If ``True`` same values for location and log-intensity
              will be sampled for the image and label.
         common_seed: Seed to be used in case ``common_sampling = True``.
-        as_tensor_output: if ``True`` return torch.Tensor, else return
-            np.array. Default: ``True``.
         allow_missing_keys: do not raise exception if key is missing.
 
     Example:
@@ -1348,6 +1369,9 @@ class RandKSpaceSpikeNoised(RandomizableTransform, MapTransform):
         ``RandKSpaceSpikeNoised("image", prob=0.5, intensity_ranges={"image":(13,15)}, channel_wise=True)``.
     """
 
+    backend = KSpaceSpikeNoise.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -1357,8 +1381,8 @@ def __init__(
         channel_wise: bool = True,
         common_sampling: bool = False,
         common_seed: int = 42,
-        as_tensor_output: bool = True,
         allow_missing_keys: bool = False,
+        as_tensor_output: bool = True,
     ):
 
         MapTransform.__init__(self, keys, allow_missing_keys)
@@ -1366,21 +1390,16 @@ def __init__(
 
         self.common_sampling = common_sampling
         self.common_seed = common_seed
-        self.as_tensor_output = as_tensor_output
         # the spikes artifact is amplitude dependent so we instantiate one per key
         self.transforms = {}
         if isinstance(intensity_ranges, Mapping):
             for k in self.keys:
-                self.transforms[k] = RandKSpaceSpikeNoise(
-                    prob, intensity_ranges[k], channel_wise, self.as_tensor_output
-                )
+                self.transforms[k] = RandKSpaceSpikeNoise(prob, intensity_ranges[k], channel_wise)
         else:
             for k in self.keys:
-                self.transforms[k] = RandKSpaceSpikeNoise(prob, None, channel_wise, self.as_tensor_output)
+                self.transforms[k] = RandKSpaceSpikeNoise(prob, None, channel_wise)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[torch.Tensor, np.ndarray]]
-    ) -> Dict[Hashable, Union[torch.Tensor, np.ndarray]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         """
         Args:
             data: Expects image/label to have dimensions (C, H, W) or
@@ -1397,11 +1416,6 @@ def __call__(
         for key, t in self.key_iterator(d, self.transforms):
             if self._do_transform:
                 d[key] = self.transforms[t](d[key])
-            else:
-                if isinstance(d[key], np.ndarray) and self.as_tensor_output:
-                    d[key] = torch.Tensor(d[key])
-                elif isinstance(d[key], torch.Tensor) and not self.as_tensor_output:
-                    d[key] = self._to_numpy(d[key])
         return d
 
     def set_rand_state(self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None) -> None:
@@ -1478,6 +1492,13 @@ def __init__(
             prob=prob,
         )
 
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandCoarseDropoutd":
+        self.dropper.set_random_state(seed, state)
+        super().set_random_state(seed, state)
+        return self
+
     def randomize(self, img_size: Sequence[int]) -> None:
         self.dropper.randomize(img_size=img_size)
 
@@ -1492,6 +1513,72 @@ def __call__(self, data):
         return d
 
 
+class RandCoarseShuffled(Randomizable, MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.RandCoarseShuffle`.
+    Expect all the data specified by `keys` have same spatial shape and will randomly dropout the same regions
+    for every key, if want to shuffle different regions for every key, please use this transform separately.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        holes: number of regions to dropout, if `max_holes` is not None, use this arg as the minimum number to
+            randomly select the expected number of regions.
+        spatial_size: spatial size of the regions to dropout, if `max_spatial_size` is not None, use this arg
+            as the minimum spatial size to randomly select size for every region.
+            if some components of the `spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        max_holes: if not None, define the maximum number to randomly select the expected number of regions.
+        max_spatial_size: if not None, define the maximum spatial size to randomly select size for every region.
+            if some components of the `max_spatial_size` are non-positive values, the transform will use the
+            corresponding components of input img size. For example, `max_spatial_size=(32, -1)` will be adapted
+            to `(32, 64)` if the second spatial dimension size of img is `64`.
+        prob: probability of applying the transform.
+        allow_missing_keys: don't raise exception if key is missing.
+
+    """
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        holes: int,
+        spatial_size: Union[Sequence[int], int],
+        max_holes: Optional[int] = None,
+        max_spatial_size: Optional[Union[Sequence[int], int]] = None,
+        prob: float = 0.1,
+        allow_missing_keys: bool = False,
+    ):
+        MapTransform.__init__(self, keys, allow_missing_keys)
+        self.shuffle = RandCoarseShuffle(
+            holes=holes,
+            spatial_size=spatial_size,
+            max_holes=max_holes,
+            max_spatial_size=max_spatial_size,
+            prob=prob,
+        )
+
+    def set_random_state(
+        self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
+    ) -> "RandCoarseShuffled":
+        self.shuffle.set_random_state(seed, state)
+        super().set_random_state(seed, state)
+        return self
+
+    def randomize(self, img_size: Sequence[int]) -> None:
+        self.shuffle.randomize(img_size=img_size)
+
+    def __call__(self, data):
+        d = dict(data)
+        # expect all the specified keys have same spatial shape
+        self.randomize(d[self.keys[0]].shape[1:])
+        if self.shuffle._do_transform:
+            for key in self.key_iterator(d):
+                d[key] = self.shuffle(img=d[key])
+
+        return d
+
+
 class HistogramNormalized(MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.HistogramNormalize`.
@@ -1512,13 +1599,15 @@ class HistogramNormalized(MapTransform):
 
     """
 
+    backend = HistogramNormalize.backend
+
     def __init__(
         self,
         keys: KeysCollection,
         num_bins: int = 256,
         min: int = 0,
         max: int = 255,
-        mask: Optional[np.ndarray] = None,
+        mask: Optional[NdarrayOrTensor] = None,
         mask_key: Optional[str] = None,
         dtype: DtypeLike = np.float32,
         allow_missing_keys: bool = False,
@@ -1527,7 +1616,7 @@ def __init__(
         self.transform = HistogramNormalize(num_bins=num_bins, min=min, max=max, mask=mask, dtype=dtype)
         self.mask_key = mask_key if mask is None else None
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             d[key] = self.transform(d[key], d[self.mask_key]) if self.mask_key is not None else self.transform(d[key])
@@ -1562,3 +1651,4 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
 RandKSpaceSpikeNoiseD = RandKSpaceSpikeNoiseDict = RandKSpaceSpikeNoised
 RandCoarseDropoutD = RandCoarseDropoutDict = RandCoarseDropoutd
 HistogramNormalizeD = HistogramNormalizeDict = HistogramNormalized
+RandCoarseShuffleD = RandCoarseShuffleDict = RandCoarseShuffled
diff --git a/monai/transforms/inverse_batch_transform.py b/monai/transforms/inverse_batch_transform.py
index d9c6790840..b485e5bac4 100644
--- a/monai/transforms/inverse_batch_transform.py
+++ b/monai/transforms/inverse_batch_transform.py
@@ -99,7 +99,7 @@ def __call__(self, data: Dict[str, Any]) -> Any:
             re_str = str(re)
             if "equal size" in re_str:
                 re_str += "\nMONAI hint: try creating `BatchInverseTransform` with `collate_fn=lambda x: x`."
-            raise RuntimeError(re_str)
+            raise RuntimeError(re_str) from re
 
 
 class Decollated(MapTransform):
diff --git a/monai/transforms/post/array.py b/monai/transforms/post/array.py
index 631947025c..eb1860eed1 100644
--- a/monai/transforms/post/array.py
+++ b/monai/transforms/post/array.py
@@ -25,7 +25,7 @@
 from monai.networks.layers import GaussianFilter
 from monai.transforms.transform import Transform
 from monai.transforms.utils import fill_holes, get_largest_connected_component_mask
-from monai.utils import deprecated_arg, ensure_tuple, look_up_option
+from monai.utils import TransformBackends, deprecated_arg, ensure_tuple, look_up_option
 
 __all__ = [
     "Activations",
@@ -57,6 +57,8 @@ class Activations(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(self, sigmoid: bool = False, softmax: bool = False, other: Optional[Callable] = None) -> None:
         self.sigmoid = sigmoid
         self.softmax = softmax
@@ -129,8 +131,13 @@ class AsDiscrete(Transform):
         rounding: if not None, round the data according to the specified option,
             available options: ["torchrounding"].
 
+    .. deprecated:: 0.6.0
+        ``n_classes`` is deprecated, use ``num_classes`` instead.
+
     """
 
+    backend = [TransformBackends.TORCH]
+
     @deprecated_arg("n_classes", since="0.6")
     def __init__(
         self,
@@ -181,6 +188,9 @@ def __call__(
             rounding: if not None, round the data according to the specified option,
                 available options: ["torchrounding"].
 
+        .. deprecated:: 0.6.0
+            ``n_classes`` is deprecated, use ``num_classes`` instead.
+
         """
         # in case the new num_classes is default but you still call deprecated n_classes
         if n_classes is not None and num_classes is None:
@@ -199,7 +209,7 @@ def __call__(
 
         rounding = self.rounding if rounding is None else rounding
         if rounding is not None:
-            rounding = look_up_option(rounding, ["torchrounding"])
+            look_up_option(rounding, ["torchrounding"])
             img = torch.round(img)
 
         return img.float()
@@ -649,9 +659,8 @@ def __call__(
                 prob_map = torch.as_tensor(prob_map, dtype=torch.float)
             self.filter.to(prob_map)
             prob_map = self.filter(prob_map)
-        else:
-            if not isinstance(prob_map, torch.Tensor):
-                prob_map = prob_map.copy()
+        elif not isinstance(prob_map, torch.Tensor):
+            prob_map = prob_map.copy()
 
         if isinstance(prob_map, torch.Tensor):
             prob_map = prob_map.detach().cpu().numpy()
diff --git a/monai/transforms/post/dictionary.py b/monai/transforms/post/dictionary.py
index 2fc3993e3e..4ca07da949 100644
--- a/monai/transforms/post/dictionary.py
+++ b/monai/transforms/post/dictionary.py
@@ -86,6 +86,8 @@ class Activationsd(MapTransform):
     Add activation layers to the input data specified by `keys`.
     """
 
+    backend = Activations.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -126,6 +128,8 @@ class AsDiscreted(MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.AsDiscrete`.
     """
 
+    backend = AsDiscrete.backend
+
     @deprecated_arg("n_classes", since="0.6")
     def __init__(
         self,
@@ -158,6 +162,9 @@ def __init__(
                 each element corresponds to a key in ``keys``.
             allow_missing_keys: don't raise exception if key is missing.
 
+        .. deprecated:: 0.6.0
+            ``n_classes`` is deprecated, use ``num_classes`` instead.
+
         """
         # in case the new num_classes is default but you still call deprecated n_classes
         if n_classes is not None and num_classes is None:
diff --git a/monai/transforms/spatial/array.py b/monai/transforms/spatial/array.py
index c3bd4a3433..9ccd315354 100644
--- a/monai/transforms/spatial/array.py
+++ b/monai/transforms/spatial/array.py
@@ -22,7 +22,7 @@
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.data.utils import compute_shape_offset, to_affine_nd, zoom_affine
 from monai.networks.layers import AffineTransform, GaussianFilter, grid_pull
-from monai.transforms.croppad.array import CenterSpatialCrop
+from monai.transforms.croppad.array import CenterSpatialCrop, Pad
 from monai.transforms.transform import Randomizable, RandomizableTransform, ThreadUnsafe, Transform
 from monai.transforms.utils import (
     create_control_grid,
@@ -38,6 +38,7 @@
     GridSamplePadMode,
     InterpolateMode,
     NumpyPadMode,
+    PytorchPadMode,
     ensure_tuple,
     ensure_tuple_rep,
     ensure_tuple_size,
@@ -45,8 +46,10 @@
     issequenceiterable,
     optional_import,
 )
+from monai.utils.deprecated import deprecated_arg
 from monai.utils.enums import TransformBackends
 from monai.utils.module import look_up_option
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 nib, _ = optional_import("nibabel")
 
@@ -82,6 +85,8 @@ class Spacing(Transform):
     Resample input image into the specified `pixdim`.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         pixdim: Union[Sequence[float], float],
@@ -90,6 +95,7 @@ def __init__(
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
         align_corners: bool = False,
         dtype: DtypeLike = np.float64,
+        image_only: bool = False,
     ) -> None:
         """
         Args:
@@ -122,6 +128,7 @@ def __init__(
             dtype: data type for resampling computation. Defaults to ``np.float64`` for best precision.
                 If None, use the data type of input data. To be compatible with other modules,
                 the output data type is always ``np.float32``.
+            image_only: return just the image or the image, the old affine and new affine. Default is `False`.
 
         """
         self.pixdim = np.array(ensure_tuple(pixdim), dtype=np.float64)
@@ -130,17 +137,18 @@ def __init__(
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
         self.align_corners = align_corners
         self.dtype = dtype
+        self.image_only = image_only
 
     def __call__(
         self,
-        data_array: np.ndarray,
-        affine: Optional[np.ndarray] = None,
+        data_array: NdarrayOrTensor,
+        affine: Optional[NdarrayOrTensor] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
         align_corners: Optional[bool] = None,
         dtype: DtypeLike = None,
         output_spatial_shape: Optional[np.ndarray] = None,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> Union[NdarrayOrTensor, Tuple[NdarrayOrTensor, NdarrayOrTensor, NdarrayOrTensor]]:
         """
         Args:
             data_array: in shape (num_channels, H[, W, ...]).
@@ -169,7 +177,7 @@ def __call__(
 
         """
         _dtype = dtype or self.dtype or data_array.dtype
-        sr = data_array.ndim - 1
+        sr = int(data_array.ndim - 1)
         if sr <= 0:
             raise ValueError("data_array must have at least one spatial dimension.")
         if affine is None:
@@ -177,7 +185,8 @@ def __call__(
             affine = np.eye(sr + 1, dtype=np.float64)
             affine_ = np.eye(sr + 1, dtype=np.float64)
         else:
-            affine_ = to_affine_nd(sr, affine)
+            affine, *_ = convert_data_type(affine, np.ndarray)
+            affine_ = to_affine_nd(sr, affine)  # type: ignore
 
         out_d = self.pixdim[:sr]
         if out_d.size < sr:
@@ -193,27 +202,30 @@ def __call__(
 
         # no resampling if it's identity transform
         if np.allclose(transform, np.diag(np.ones(len(transform))), atol=1e-3):
-            output_data = data_array.copy().astype(np.float32)
-            new_affine = to_affine_nd(affine, new_affine)
-            return output_data, affine, new_affine
-
-        # resample
-        affine_xform = AffineTransform(
-            normalized=False,
-            mode=look_up_option(mode or self.mode, GridSampleMode),
-            padding_mode=look_up_option(padding_mode or self.padding_mode, GridSamplePadMode),
-            align_corners=self.align_corners if align_corners is None else align_corners,
-            reverse_indexing=True,
-        )
-        output_data = affine_xform(
-            # AffineTransform requires a batch dim
-            torch.as_tensor(np.ascontiguousarray(data_array).astype(_dtype)).unsqueeze(0),
-            torch.as_tensor(np.ascontiguousarray(transform).astype(_dtype)),
-            spatial_size=output_shape if output_spatial_shape is None else output_spatial_shape,
-        )
-        output_data = np.asarray(output_data.squeeze(0).detach().cpu().numpy(), dtype=np.float32)  # type: ignore
-        new_affine = to_affine_nd(affine, new_affine)
-
+            output_data, *_ = convert_data_type(data_array, dtype=torch.float32)
+            new_affine = to_affine_nd(affine, new_affine)  # type: ignore
+        else:
+            # resample
+            affine_xform = AffineTransform(
+                normalized=False,
+                mode=look_up_option(mode or self.mode, GridSampleMode),
+                padding_mode=look_up_option(padding_mode or self.padding_mode, GridSamplePadMode),
+                align_corners=self.align_corners if align_corners is None else align_corners,
+                reverse_indexing=True,
+            )
+            data_array_t: torch.Tensor
+            data_array_t, *_ = convert_data_type(data_array, torch.Tensor, dtype=_dtype)  # type: ignore
+            output_data = affine_xform(
+                # AffineTransform requires a batch dim
+                data_array_t.unsqueeze(0),
+                convert_data_type(transform, torch.Tensor, data_array_t.device, dtype=_dtype)[0],
+                spatial_size=output_shape if output_spatial_shape is None else output_spatial_shape,
+            ).squeeze(0)
+            output_data, *_ = convert_to_dst_type(output_data, data_array, dtype=torch.float32)
+            new_affine = to_affine_nd(affine, new_affine)  # type: ignore
+
+        if self.image_only:
+            return output_data
         return output_data, affine, new_affine
 
 
@@ -227,6 +239,7 @@ def __init__(
         axcodes: Optional[str] = None,
         as_closest_canonical: bool = False,
         labels: Optional[Sequence[Tuple[str, str]]] = tuple(zip("LPI", "RAS")),
+        image_only: bool = False,
     ) -> None:
         """
         Args:
@@ -239,6 +252,7 @@ def __init__(
             labels: optional, None or sequence of (2,) sequences
                 (2,) sequences are labels for (beginning, end) of output axis.
                 Defaults to ``(('L', 'R'), ('P', 'A'), ('I', 'S'))``.
+            image_only: if True return only the image volume, otherwise return (image, affine, new_affine).
 
         Raises:
             ValueError: When ``axcodes=None`` and ``as_closest_canonical=True``. Incompatible values.
@@ -253,10 +267,11 @@ def __init__(
         self.axcodes = axcodes
         self.as_closest_canonical = as_closest_canonical
         self.labels = labels
+        self.image_only = image_only
 
     def __call__(
         self, data_array: np.ndarray, affine: Optional[np.ndarray] = None
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
         """
         original orientation of `data_array` is defined by `affine`.
 
@@ -269,9 +284,11 @@ def __call__(
             ValueError: When ``axcodes`` spatiality differs from ``data_array``.
 
         Returns:
-            data_array (reoriented in `self.axcodes`), original axcodes, current axcodes.
+            data_array [reoriented in `self.axcodes`] if `self.image_only`, else
+            (data_array [reoriented in `self.axcodes`], original axcodes, current axcodes).
 
         """
+        data_array, *_ = convert_data_type(data_array, np.ndarray)  # type: ignore
         sr = data_array.ndim - 1
         if sr <= 0:
             raise ValueError("data_array must have at least one spatial dimension.")
@@ -300,6 +317,8 @@ def __call__(
         new_affine = affine_ @ nib.orientations.inv_ornt_aff(spatial_ornt, shape)
         new_affine = to_affine_nd(affine, new_affine)
 
+        if self.image_only:
+            return data_array
         return data_array, affine, new_affine
 
 
@@ -330,8 +349,7 @@ def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         if isinstance(img, np.ndarray):
             return np.ascontiguousarray(np.flip(img, map_spatial_axes(img.ndim, self.spatial_axis)))
-        else:
-            return torch.flip(img, map_spatial_axes(img.ndim, self.spatial_axis))
+        return torch.flip(img, map_spatial_axes(img.ndim, self.spatial_axis))
 
 
 class Resize(Transform):
@@ -357,6 +375,8 @@ class Resize(Transform):
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         spatial_size: Union[Sequence[int], int],
@@ -371,10 +391,10 @@ def __init__(
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[InterpolateMode, str]] = None,
         align_corners: Optional[bool] = None,
-    ) -> np.ndarray:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]).
@@ -389,32 +409,33 @@ def __call__(
             ValueError: When ``self.spatial_size`` length is less than ``img`` spatial dimensions.
 
         """
+        img_, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float)  # type: ignore
         if self.size_mode == "all":
-            input_ndim = img.ndim - 1  # spatial ndim
+            input_ndim = img_.ndim - 1  # spatial ndim
             output_ndim = len(ensure_tuple(self.spatial_size))
             if output_ndim > input_ndim:
-                input_shape = ensure_tuple_size(img.shape, output_ndim + 1, 1)
-                img = img.reshape(input_shape)
+                input_shape = ensure_tuple_size(img_.shape, output_ndim + 1, 1)
+                img_ = img_.reshape(input_shape)
             elif output_ndim < input_ndim:
                 raise ValueError(
                     "len(spatial_size) must be greater or equal to img spatial dimensions, "
                     f"got spatial_size={output_ndim} img={input_ndim}."
                 )
-            spatial_size_ = fall_back_tuple(self.spatial_size, img.shape[1:])
+            spatial_size_ = fall_back_tuple(self.spatial_size, img_.shape[1:])
         else:  # for the "longest" mode
-            img_size = img.shape[1:]
+            img_size = img_.shape[1:]
             if not isinstance(self.spatial_size, int):
                 raise ValueError("spatial_size must be an int number if size_mode is 'longest'.")
             scale = self.spatial_size / max(img_size)
             spatial_size_ = tuple(int(round(s * scale)) for s in img_size)
         resized = torch.nn.functional.interpolate(  # type: ignore
-            input=torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0),
+            input=img_.unsqueeze(0),  # type: ignore
             size=spatial_size_,
             mode=look_up_option(self.mode if mode is None else mode, InterpolateMode).value,
             align_corners=self.align_corners if align_corners is None else align_corners,
         )
-        resized = resized.squeeze(0).detach().cpu().numpy()
-        return np.asarray(resized)
+        out, *_ = convert_to_dst_type(resized.squeeze(0), img)
+        return out
 
 
 class Rotate(Transform, ThreadUnsafe):
@@ -439,6 +460,8 @@ class Rotate(Transform, ThreadUnsafe):
             the output data type is always ``np.float32``.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         angle: Union[Sequence[float], float],
@@ -446,7 +469,7 @@ def __init__(
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
         align_corners: bool = False,
-        dtype: DtypeLike = np.float64,
+        dtype: Union[DtypeLike, torch.dtype] = np.float64,
     ) -> None:
         self.angle = angle
         self.keep_size = keep_size
@@ -454,16 +477,16 @@ def __init__(
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
         self.align_corners = align_corners
         self.dtype = dtype
-        self._rotation_matrix: Optional[np.ndarray] = None
+        self._rotation_matrix: Optional[NdarrayOrTensor] = None
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
         align_corners: Optional[bool] = None,
-        dtype: DtypeLike = None,
-    ) -> np.ndarray:
+        dtype: Union[DtypeLike, torch.dtype] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: [chns, H, W] or [chns, H, W, D].
@@ -486,7 +509,11 @@ def __call__(
 
         """
         _dtype = dtype or self.dtype or img.dtype
-        im_shape = np.asarray(img.shape[1:])  # spatial dimensions
+
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=_dtype)  # type: ignore
+
+        im_shape = np.asarray(img_t.shape[1:])  # spatial dimensions
         input_ndim = len(im_shape)
         if input_ndim not in (2, 3):
             raise ValueError(f"Unsupported img dimension: {input_ndim}, available options are [2, 3].")
@@ -499,11 +526,14 @@ def __call__(
             corners = np.asarray(np.meshgrid(*[(0, dim) for dim in im_shape], indexing="ij")).reshape(
                 (len(im_shape), -1)
             )
-            corners = transform[:-1, :-1] @ corners
+            corners = transform[:-1, :-1] @ corners  # type: ignore
             output_shape = np.asarray(corners.ptp(axis=1) + 0.5, dtype=int)
         shift_1 = create_translate(input_ndim, (-(output_shape - 1) / 2).tolist())
         transform = shift @ transform @ shift_1
 
+        transform_t: torch.Tensor
+        transform_t, *_ = convert_to_dst_type(transform, img_t)  # type: ignore
+
         xform = AffineTransform(
             normalized=False,
             mode=look_up_option(mode or self.mode, GridSampleMode),
@@ -511,15 +541,13 @@ def __call__(
             align_corners=self.align_corners if align_corners is None else align_corners,
             reverse_indexing=True,
         )
-        output = xform(
-            torch.as_tensor(np.ascontiguousarray(img).astype(_dtype)).unsqueeze(0),
-            torch.as_tensor(np.ascontiguousarray(transform).astype(_dtype)),
-            spatial_size=output_shape,
-        )
+        output: torch.Tensor = xform(img_t.unsqueeze(0), transform_t, spatial_size=output_shape).float().squeeze(0)
         self._rotation_matrix = transform
-        return np.asarray(output.squeeze(0).detach().cpu().numpy(), dtype=np.float32)
+        out: NdarrayOrTensor
+        out, *_ = convert_to_dst_type(output, dst=img, dtype=output.dtype)
+        return out
 
-    def get_rotation_matrix(self) -> Optional[np.ndarray]:
+    def get_rotation_matrix(self) -> Optional[NdarrayOrTensor]:
         """
         Get the most recently applied rotation matrix
         This is not thread-safe.
@@ -542,82 +570,96 @@ class Zoom(Transform):
         mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
         keep_size: Should keep original size (padding/slicing if needed), default is True.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         zoom: Union[Sequence[float], float],
         mode: Union[InterpolateMode, str] = InterpolateMode.AREA,
-        padding_mode: Union[NumpyPadMode, str] = NumpyPadMode.EDGE,
+        padding_mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.EDGE,
         align_corners: Optional[bool] = None,
         keep_size: bool = True,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         self.zoom = zoom
         self.mode: InterpolateMode = InterpolateMode(mode)
-        self.padding_mode: NumpyPadMode = NumpyPadMode(padding_mode)
+        self.padding_mode = padding_mode
         self.align_corners = align_corners
         self.keep_size = keep_size
-        self.np_kwargs = np_kwargs
+        self.kwargs = kwargs
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[InterpolateMode, str]] = None,
-        padding_mode: Optional[Union[NumpyPadMode, str]] = None,
+        padding_mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
         align_corners: Optional[bool] = None,
-    ):
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]).
             mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
                 The interpolation mode. Defaults to ``self.mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-            padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-                ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
-                The mode to pad data after zooming, default to ``self.padding_mode``.
-                See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to ``"constant"``.
+                The mode to pad data after zooming.
+                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
             align_corners: This only has an effect when mode is
                 'linear', 'bilinear', 'bicubic' or 'trilinear'. Defaults to ``self.align_corners``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
 
         """
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, dtype=torch.float32)  # type: ignore
+
         _zoom = ensure_tuple_rep(self.zoom, img.ndim - 1)  # match the spatial image dim
-        zoomed = torch.nn.functional.interpolate(  # type: ignore
+        zoomed: NdarrayOrTensor = torch.nn.functional.interpolate(  # type: ignore
             recompute_scale_factor=True,
-            input=torch.as_tensor(np.ascontiguousarray(img), dtype=torch.float).unsqueeze(0),
+            input=img_t.unsqueeze(0),
             scale_factor=list(_zoom),
             mode=look_up_option(self.mode if mode is None else mode, InterpolateMode).value,
             align_corners=self.align_corners if align_corners is None else align_corners,
         )
-        zoomed = zoomed.squeeze(0).detach().cpu().numpy()
-        if not self.keep_size or np.allclose(img.shape, zoomed.shape):
-            return zoomed
+        zoomed = zoomed.squeeze(0)
 
-        pad_vec = [[0, 0]] * len(img.shape)
-        slice_vec = [slice(None)] * len(img.shape)
-        for idx, (od, zd) in enumerate(zip(img.shape, zoomed.shape)):
-            diff = od - zd
-            half = abs(diff) // 2
-            if diff > 0:  # need padding
-                pad_vec[idx] = [half, diff - half]
-            elif diff < 0:  # need slicing
-                slice_vec[idx] = slice(half, half + od)
+        if self.keep_size and not np.allclose(img_t.shape, zoomed.shape):
 
-        padding_mode = look_up_option(self.padding_mode if padding_mode is None else padding_mode, NumpyPadMode)
-        zoomed = np.pad(zoomed, pad_vec, mode=padding_mode.value, **self.np_kwargs)  # type: ignore
-        return zoomed[tuple(slice_vec)]
+            pad_vec = [(0, 0)] * len(img_t.shape)
+            slice_vec = [slice(None)] * len(img_t.shape)
+            for idx, (od, zd) in enumerate(zip(img_t.shape, zoomed.shape)):
+                diff = od - zd
+                half = abs(diff) // 2
+                if diff > 0:  # need padding
+                    pad_vec[idx] = (half, diff - half)
+                elif diff < 0:  # need slicing
+                    slice_vec[idx] = slice(half, half + od)
+
+            padder = Pad(pad_vec, padding_mode or self.padding_mode)
+            zoomed = padder(zoomed)
+            zoomed = zoomed[tuple(slice_vec)]
+
+        out, *_ = convert_to_dst_type(zoomed, dst=img)
+        return out
 
 
 class Rotate90(Transform):
@@ -628,6 +670,8 @@ class Rotate90(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
     def __init__(self, k: int = 1, spatial_axes: Tuple[int, int] = (0, 1)) -> None:
         """
         Args:
@@ -642,14 +686,15 @@ def __init__(self, k: int = 1, spatial_axes: Tuple[int, int] = (0, 1)) -> None:
             raise ValueError("spatial_axes must be 2 int numbers to indicate the axes to rotate 90 degrees.")
         self.spatial_axes = spatial_axes_
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]),
         """
-
-        result: np.ndarray = np.rot90(img, self.k, map_spatial_axes(img.ndim, self.spatial_axes))
-        return result.astype(img.dtype)
+        rot90 = torch.rot90 if isinstance(img, torch.Tensor) else np.rot90
+        out: NdarrayOrTensor = rot90(img, self.k, map_spatial_axes(img.ndim, self.spatial_axes))
+        out, *_ = convert_data_type(out, dtype=img.dtype)
+        return out
 
 
 class RandRotate90(RandomizableTransform):
@@ -658,6 +703,8 @@ class RandRotate90(RandomizableTransform):
     in the plane specified by `spatial_axes`.
     """
 
+    backend = Rotate90.backend
+
     def __init__(self, prob: float = 0.1, max_k: int = 3, spatial_axes: Tuple[int, int] = (0, 1)) -> None:
         """
         Args:
@@ -677,7 +724,7 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self._rand_k = self.R.randint(self.max_k) + 1
         super().randomize(None)
 
-    def __call__(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape: (num_channels, H[, W, ..., ]),
@@ -717,6 +764,8 @@ class RandRotate(RandomizableTransform):
             the output data type is always ``np.float32``.
     """
 
+    backend = Rotate.backend
+
     def __init__(
         self,
         range_x: Union[Tuple[float, float], float] = 0.0,
@@ -727,7 +776,7 @@ def __init__(
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
         align_corners: bool = False,
-        dtype: DtypeLike = np.float64,
+        dtype: Union[DtypeLike, torch.dtype] = np.float64,
     ) -> None:
         RandomizableTransform.__init__(self, prob)
         self.range_x = ensure_tuple(range_x)
@@ -758,12 +807,12 @@ def randomize(self, data: Optional[Any] = None) -> None:
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
         align_corners: Optional[bool] = None,
-        dtype: DtypeLike = None,
-    ) -> np.ndarray:
+        dtype: Union[DtypeLike, torch.dtype] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape 2D: (nchannels, H, W), or 3D: (nchannels, H, W, D).
@@ -781,7 +830,9 @@ def __call__(
         """
         self.randomize()
         if not self._do_transform:
-            return img
+            img_t: torch.Tensor
+            img_t, *_ = convert_data_type(img, torch.Tensor)  # type: ignore
+            return img_t
         rotator = Rotate(
             angle=self.x if img.ndim == 3 else (self.x, self.y, self.z),
             keep_size=self.keep_size,
@@ -790,7 +841,7 @@ def __call__(
             align_corners=self.align_corners if align_corners is None else align_corners,
             dtype=dtype or self.dtype or img.dtype,
         )
-        return np.array(rotator(img))
+        return rotator(img)
 
 
 class RandFlip(RandomizableTransform):
@@ -873,29 +924,34 @@ class RandZoom(RandomizableTransform):
         mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
         keep_size: Should keep original size (pad if needed), default is True.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
+    backend = Zoom.backend
+
     def __init__(
         self,
         prob: float = 0.1,
         min_zoom: Union[Sequence[float], float] = 0.9,
         max_zoom: Union[Sequence[float], float] = 1.1,
         mode: Union[InterpolateMode, str] = InterpolateMode.AREA,
-        padding_mode: Union[NumpyPadMode, str] = NumpyPadMode.EDGE,
+        padding_mode: Union[NumpyPadMode, PytorchPadMode, str] = NumpyPadMode.EDGE,
         align_corners: Optional[bool] = None,
         keep_size: bool = True,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         RandomizableTransform.__init__(self, prob)
         self.min_zoom = ensure_tuple(min_zoom)
@@ -903,10 +959,10 @@ def __init__(
         if len(self.min_zoom) != len(self.max_zoom):
             raise AssertionError("min_zoom and max_zoom must have same length.")
         self.mode: InterpolateMode = look_up_option(mode, InterpolateMode)
-        self.padding_mode: NumpyPadMode = look_up_option(padding_mode, NumpyPadMode)
+        self.padding_mode = padding_mode
         self.align_corners = align_corners
         self.keep_size = keep_size
-        self.np_kwargs = np_kwargs
+        self.kwargs = kwargs
 
         self._zoom: Sequence[float] = [1.0]
 
@@ -916,46 +972,49 @@ def randomize(self, data: Optional[Any] = None) -> None:
 
     def __call__(
         self,
-        img: np.ndarray,
+        img: NdarrayOrTensor,
         mode: Optional[Union[InterpolateMode, str]] = None,
-        padding_mode: Optional[Union[NumpyPadMode, str]] = None,
+        padding_mode: Optional[Union[NumpyPadMode, PytorchPadMode, str]] = None,
         align_corners: Optional[bool] = None,
-    ) -> np.ndarray:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: channel first array, must have shape 2D: (nchannels, H, W), or 3D: (nchannels, H, W, D).
             mode: {``"nearest"``, ``"linear"``, ``"bilinear"``, ``"bicubic"``, ``"trilinear"``, ``"area"``}
                 The interpolation mode. Defaults to ``self.mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
-            padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-                ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
-                The mode to pad data after zooming, default to ``self.padding_mode``.
-                See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+                ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+                available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+                One of the listed string values or a user supplied function. Defaults to ``"constant"``.
+                The mode to pad data after zooming.
+                See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+                https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
             align_corners: This only has an effect when mode is
                 'linear', 'bilinear', 'bicubic' or 'trilinear'. Defaults to ``self.align_corners``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
         """
         # match the spatial image dim
         self.randomize()
-        _dtype = np.float32
         if not self._do_transform:
-            return img.astype(_dtype)
+            img_t: torch.Tensor
+            img_t, *_ = convert_data_type(img, dtype=torch.float32)  # type: ignore
+            return img_t
         if len(self._zoom) == 1:
             # to keep the spatial shape ratio, use same random zoom factor for all dims
             self._zoom = ensure_tuple_rep(self._zoom[0], img.ndim - 1)
         elif len(self._zoom) == 2 and img.ndim > 3:
             # if 2 zoom factors provided for 3D data, use the first factor for H and W dims, second factor for D dim
             self._zoom = ensure_tuple_rep(self._zoom[0], img.ndim - 2) + ensure_tuple(self._zoom[-1])
-        zoomer = Zoom(self._zoom, keep_size=self.keep_size, **self.np_kwargs)
-        return np.asarray(
-            zoomer(
-                img,
-                mode=look_up_option(mode or self.mode, InterpolateMode),
-                padding_mode=look_up_option(padding_mode or self.padding_mode, NumpyPadMode),
-                align_corners=self.align_corners if align_corners is None else align_corners,
-            ),
-            dtype=_dtype,
+        zoomer = Zoom(
+            self._zoom,
+            keep_size=self.keep_size,
+            mode=look_up_option(mode or self.mode, InterpolateMode),
+            padding_mode=padding_mode or self.padding_mode,
+            align_corners=align_corners or self.align_corners,
+            **self.kwargs,
         )
+        return zoomer(img)
 
 
 class AffineGrid(Transform):
@@ -979,14 +1038,18 @@ class AffineGrid(Transform):
             pixel/voxel relative to the center of the input image. Defaults to no translation.
         scale_params: scale factor for every spatial dims. a tuple of 2 floats for 2D,
             a tuple of 3 floats for 3D. Defaults to `1.0`.
-        as_tensor_output: whether to output tensor instead of numpy array, defaults to True.
-        device: device to store the output grid data.
         affine: If applied, ignore the params (`rotate_params`, etc.) and use the
             supplied matrix. Should be square with each side = num of image spatial
             dimensions + 1.
 
+    .. deprecated:: 0.6.0
+        ``as_tensor_output`` is deprecated.
+
     """
 
+    backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         rotate_params: Optional[Union[Sequence[float], float]] = None,
@@ -995,24 +1058,25 @@ def __init__(
         scale_params: Optional[Union[Sequence[float], float]] = None,
         as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
-        affine: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        affine: Optional[NdarrayOrTensor] = None,
     ) -> None:
         self.rotate_params = rotate_params
         self.shear_params = shear_params
         self.translate_params = translate_params
         self.scale_params = scale_params
-
-        self.as_tensor_output = as_tensor_output
         self.device = device
-
         self.affine = affine
 
     def __call__(
         self,
         spatial_size: Optional[Sequence[int]] = None,
-        grid: Optional[Union[np.ndarray, torch.Tensor]] = None,
-    ) -> Tuple[Union[np.ndarray, torch.Tensor], Union[np.ndarray, torch.Tensor]]:
+        grid: Optional[NdarrayOrTensor] = None,
+    ) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
         """
+        The grid can be initialized with a `spatial_size` parameter, or provided directly as `grid`.
+        Therefore, either `spatial_size` or `grid` must be provided.
+        When initialising from `spatial_size`, the backend "torch" will be used.
+
         Args:
             spatial_size: output grid size.
             grid: grid to be transformed. Shape must be (3, H, W) for 2D or (4, H, W, D) for 3D.
@@ -1023,36 +1087,36 @@ def __call__(
         """
         if grid is None:
             if spatial_size is not None:
-                grid = create_grid(spatial_size)
+                grid = create_grid(spatial_size, device=self.device, backend="torch")
             else:
                 raise ValueError("Incompatible values: grid=None and spatial_size=None.")
 
-        affine: Union[torch.Tensor, np.ndarray]
+        _b = TransformBackends.TORCH if isinstance(grid, torch.Tensor) else TransformBackends.NUMPY
+        _device = grid.device if isinstance(grid, torch.Tensor) else self.device
+        affine: NdarrayOrTensor
         if self.affine is None:
             spatial_dims = len(grid.shape) - 1
-            affine = np.eye(spatial_dims + 1)
+            affine = (
+                torch.eye(spatial_dims + 1, device=_device)
+                if _b == TransformBackends.TORCH
+                else np.eye(spatial_dims + 1)
+            )
             if self.rotate_params:
-                affine = affine @ create_rotate(spatial_dims, self.rotate_params)
+                affine = affine @ create_rotate(spatial_dims, self.rotate_params, device=_device, backend=_b)
             if self.shear_params:
-                affine = affine @ create_shear(spatial_dims, self.shear_params)
+                affine = affine @ create_shear(spatial_dims, self.shear_params, device=_device, backend=_b)
             if self.translate_params:
-                affine = affine @ create_translate(spatial_dims, self.translate_params)
+                affine = affine @ create_translate(spatial_dims, self.translate_params, device=_device, backend=_b)
             if self.scale_params:
-                affine = affine @ create_scale(spatial_dims, self.scale_params)
+                affine = affine @ create_scale(spatial_dims, self.scale_params, device=_device, backend=_b)
         else:
             affine = self.affine
 
-        if isinstance(affine, np.ndarray):
-            affine = torch.as_tensor(np.ascontiguousarray(affine))
+        grid, *_ = convert_data_type(grid, torch.Tensor, device=_device, dtype=float)
+        affine, *_ = convert_to_dst_type(affine, grid)
 
-        grid = torch.tensor(grid) if not isinstance(grid, torch.Tensor) else grid.detach().clone()
-        if self.device:
-            affine = affine.to(self.device)
-            grid = grid.to(self.device)
-        grid = (affine.float() @ grid.reshape((grid.shape[0], -1)).float()).reshape([-1] + list(grid.shape[1:]))
-        if grid is None or not isinstance(grid, torch.Tensor):
-            raise ValueError("Unknown grid.")
-        return grid if self.as_tensor_output else np.asarray(grid.cpu().numpy()), affine
+        grid = (affine @ grid.reshape((grid.shape[0], -1))).reshape([-1] + list(grid.shape[1:]))
+        return grid, affine
 
 
 class RandAffineGrid(Randomizable, Transform):
@@ -1061,6 +1125,9 @@ class RandAffineGrid(Randomizable, Transform):
 
     """
 
+    backend = AffineGrid.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         rotate_range: RandRange = None,
@@ -1094,8 +1161,6 @@ def __init__(
             scale_range: scaling range with format matching `rotate_range`. it defines the range to randomly select
                 the scale factor to translate for every spatial dims. A value of 1.0 is added to the result.
                 This allows 0 to correspond to no change (i.e., a scaling of 1.0).
-            as_tensor_output: whether to output tensor instead of numpy array.
-                defaults to True.
             device: device to store the output grid data.
 
         See also:
@@ -1103,6 +1168,10 @@ def __init__(
             - :py:meth:`monai.transforms.utils.create_shear`
             - :py:meth:`monai.transforms.utils.create_translate`
             - :py:meth:`monai.transforms.utils.create_scale`
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         self.rotate_range = ensure_tuple(rotate_range)
         self.shear_range = ensure_tuple(shear_range)
@@ -1114,9 +1183,8 @@ def __init__(
         self.translate_params: Optional[List[float]] = None
         self.scale_params: Optional[List[float]] = None
 
-        self.as_tensor_output = as_tensor_output
         self.device = device
-        self.affine: Optional[Union[np.ndarray, torch.Tensor]] = None
+        self.affine: Optional[NdarrayOrTensor] = None
 
     def _get_rand_param(self, param_range, add_scalar: float = 0.0):
         out_param = []
@@ -1138,8 +1206,8 @@ def randomize(self, data: Optional[Any] = None) -> None:
     def __call__(
         self,
         spatial_size: Optional[Sequence[int]] = None,
-        grid: Optional[Union[np.ndarray, torch.Tensor]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+        grid: Optional[NdarrayOrTensor] = None,
+    ) -> NdarrayOrTensor:
         """
         Args:
             spatial_size: output grid size.
@@ -1154,13 +1222,13 @@ def __call__(
             shear_params=self.shear_params,
             translate_params=self.translate_params,
             scale_params=self.scale_params,
-            as_tensor_output=self.as_tensor_output,
             device=self.device,
         )
-        grid, self.affine = affine_grid(spatial_size, grid)
-        return grid
+        _grid: NdarrayOrTensor
+        _grid, self.affine = affine_grid(spatial_size, grid)
+        return _grid
 
-    def get_transformation_matrix(self) -> Optional[Union[np.ndarray, torch.Tensor]]:
+    def get_transformation_matrix(self) -> Optional[NdarrayOrTensor]:
         """Get the most recently applied transformation matrix"""
         return self.affine
 
@@ -1170,6 +1238,8 @@ class RandDeformGrid(Randomizable, Transform):
     Generate random deformation grid.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         spacing: Union[Sequence[float], float],
@@ -1207,20 +1277,25 @@ def __call__(self, spatial_size: Sequence[int]):
             spatial_size: spatial size of the grid.
         """
         self.spacing = fall_back_tuple(self.spacing, (1.0,) * len(spatial_size))
-        control_grid = create_control_grid(spatial_size, self.spacing)
+        control_grid = create_control_grid(spatial_size, self.spacing, device=self.device, backend="torch")
         self.randomize(control_grid.shape[1:])
-        control_grid[: len(spatial_size)] += self.rand_mag * self.random_offset
-        if self.as_tensor_output:
-            control_grid = torch.as_tensor(np.ascontiguousarray(control_grid), device=self.device)
+        _offset, *_ = convert_to_dst_type(self.rand_mag * self.random_offset, control_grid)
+        control_grid[: len(spatial_size)] += _offset
+        if not self.as_tensor_output:
+            control_grid, *_ = convert_data_type(control_grid, output_type=np.ndarray, dtype=np.float32)
         return control_grid
 
 
 class Resample(Transform):
+
+    backend = [TransformBackends.TORCH]
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.BORDER,
-        as_tensor_output: bool = False,
+        as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
     ) -> None:
         """
@@ -1234,21 +1309,23 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"border"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: whether to return a torch tensor. Defaults to False.
             device: device on which the tensor will be allocated.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         self.mode: GridSampleMode = look_up_option(mode, GridSampleMode)
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
-        self.as_tensor_output = as_tensor_output
         self.device = device
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
-        grid: Optional[Union[np.ndarray, torch.Tensor]] = None,
+        img: NdarrayOrTensor,
+        grid: Optional[NdarrayOrTensor] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W[, D]).
@@ -1260,18 +1337,15 @@ def __call__(
                 Padding mode for outside grid values. Defaults to ``self.padding_mode``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
         """
-
-        if not isinstance(img, torch.Tensor):
-            img = torch.as_tensor(np.ascontiguousarray(img))
         if grid is None:
-            raise AssertionError("Error, grid argument must be supplied as an ndarray or tensor ")
-        grid = torch.tensor(grid) if not isinstance(grid, torch.Tensor) else grid.detach().clone()
-        if self.device:
-            img = img.to(self.device)
-            grid = grid.to(self.device)
+            raise ValueError("Unknown grid.")
+        _device = img.device if isinstance(img, torch.Tensor) else self.device
+        img_t: torch.Tensor
+        img_t, *_ = convert_data_type(img, torch.Tensor, device=_device, dtype=torch.float32)  # type: ignore
+        grid, *_ = convert_to_dst_type(grid, img_t)
 
         if USE_COMPILED:
-            for i, dim in enumerate(img.shape[1:]):
+            for i, dim in enumerate(img_t.shape[1:]):
                 grid[i] += (dim - 1.0) / 2.0
             grid = grid[:-1] / grid[-1:]
             grid = grid.permute(list(range(grid.ndimension()))[1:] + [0])
@@ -1286,29 +1360,29 @@ def __call__(
                 bound = 1
             _interp_mode = look_up_option(self.mode if mode is None else mode, GridSampleMode).value
             out = grid_pull(
-                img.unsqueeze(0).float(),
-                grid.unsqueeze(0).float(),
+                img_t.unsqueeze(0),
+                grid.unsqueeze(0),
                 bound=bound,
                 extrapolate=True,
                 interpolation=1 if _interp_mode == "bilinear" else _interp_mode,
             )[0]
         else:
-            for i, dim in enumerate(img.shape[1:]):
+            for i, dim in enumerate(img_t.shape[1:]):
                 grid[i] = 2.0 * grid[i] / (dim - 1.0)
             grid = grid[:-1] / grid[-1:]
-            index_ordering: List[int] = list(range(img.ndimension() - 2, -1, -1))
+            index_ordering: List[int] = list(range(img_t.ndimension() - 2, -1, -1))
             grid = grid[index_ordering]
             grid = grid.permute(list(range(grid.ndimension()))[1:] + [0])
             out = torch.nn.functional.grid_sample(
-                img.unsqueeze(0).float(),
-                grid.unsqueeze(0).float(),
+                img_t.unsqueeze(0),
+                grid.unsqueeze(0),
                 mode=self.mode.value if mode is None else GridSampleMode(mode).value,
                 padding_mode=self.padding_mode.value if padding_mode is None else GridSamplePadMode(padding_mode).value,
                 align_corners=True,
             )[0]
-        if self.as_tensor_output:
-            return torch.as_tensor(out)
-        return np.asarray(out.cpu().numpy())
+        out_val: NdarrayOrTensor
+        out_val, *_ = convert_to_dst_type(out, dst=img, dtype=out.dtype)
+        return out_val
 
 
 class Affine(Transform):
@@ -1318,6 +1392,9 @@ class Affine(Transform):
 
     """
 
+    backend = list(set(AffineGrid.backend) & set(Resample.backend))
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         rotate_params: Optional[Union[Sequence[float], float]] = None,
@@ -1327,7 +1404,7 @@ def __init__(
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: Union[GridSampleMode, str] = GridSampleMode.BILINEAR,
         padding_mode: Union[GridSamplePadMode, str] = GridSamplePadMode.REFLECTION,
-        as_tensor_output: bool = False,
+        as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
         image_only: bool = False,
     ) -> None:
@@ -1363,32 +1440,33 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             image_only: if True return only the image volume, otherwise return (image, affine).
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         self.affine_grid = AffineGrid(
             rotate_params=rotate_params,
             shear_params=shear_params,
             translate_params=translate_params,
             scale_params=scale_params,
-            as_tensor_output=True,
             device=device,
         )
         self.image_only = image_only
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.resampler = Resample(device=device)
         self.spatial_size = spatial_size
         self.mode: GridSampleMode = look_up_option(mode, GridSampleMode)
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ):
+    ) -> Union[NdarrayOrTensor, Tuple[NdarrayOrTensor, NdarrayOrTensor]]:
         """
         Args:
             img: shape must be (num_channels, H, W[, D]),
@@ -1418,6 +1496,9 @@ class RandAffine(RandomizableTransform):
 
     """
 
+    backend = Affine.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         prob: float = 0.1,
@@ -1473,13 +1554,15 @@ def __init__(
             cache_grid: whether to cache the identity sampling grid.
                 If the spatial size is not dynamically defined by input image, enabling this option could
                 accelerate the transform.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         RandomizableTransform.__init__(self, prob)
 
@@ -1488,10 +1571,9 @@ def __init__(
             shear_range=shear_range,
             translate_range=translate_range,
             scale_range=scale_range,
-            as_tensor_output=True,
             device=device,
         )
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.resampler = Resample(device=device)
 
         self.spatial_size = spatial_size
         self.cache_grid = cache_grid
@@ -1519,7 +1601,7 @@ def _init_identity_cache(self):
                     f"'spatial_size={self.spatial_size}', please specify 'spatial_size'."
                 )
             return None
-        return torch.tensor(create_grid(spatial_size=_sp_size)).to(self.rand_affine_grid.device)
+        return create_grid(spatial_size=_sp_size, device=self.rand_affine_grid.device, backend="torch")
 
     def get_identity_grid(self, spatial_size: Sequence[int]):
         """
@@ -1533,7 +1615,11 @@ def get_identity_grid(self, spatial_size: Sequence[int]):
             spatial_size, [2] * ndim
         ):
             raise RuntimeError(f"spatial_size should not be dynamic, got {spatial_size}.")
-        return create_grid(spatial_size=spatial_size) if self._cached_grid is None else self._cached_grid
+        return (
+            create_grid(spatial_size=spatial_size, device=self.rand_affine_grid.device, backend="torch")
+            if self._cached_grid is None
+            else self._cached_grid
+        )
 
     def set_random_state(
         self, seed: Optional[int] = None, state: Optional[np.random.RandomState] = None
@@ -1548,11 +1634,11 @@ def randomize(self, data: Optional[Any] = None) -> None:
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W[, D]),
@@ -1570,18 +1656,18 @@ def __call__(
         """
         self.randomize()
         # if not doing transform and spatial size doesn't change, nothing to do
-        # except convert to float and convert numpy/torch
+        # except convert to float and device
         sp_size = fall_back_tuple(spatial_size or self.spatial_size, img.shape[1:])
         do_resampling = self._do_transform or (sp_size != ensure_tuple(img.shape[1:]))
         if not do_resampling:
-            img = img.float() if isinstance(img, torch.Tensor) else img.astype("float32")
-            return torch.Tensor(img) if self.resampler.as_tensor_output else np.array(img)
+            img, *_ = convert_data_type(img, dtype=torch.float32, device=self.resampler.device)
         grid = self.get_identity_grid(sp_size)
         if self._do_transform:
             grid = self.rand_affine_grid(grid=grid)
-        return self.resampler(
+        out: NdarrayOrTensor = self.resampler(
             img=img, grid=grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode
         )
+        return out
 
 
 class Rand2DElastic(RandomizableTransform):
@@ -1591,6 +1677,9 @@ class Rand2DElastic(RandomizableTransform):
 
     """
 
+    backend = Resample.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         spacing: Union[Tuple[float, float], float],
@@ -1645,13 +1734,15 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         RandomizableTransform.__init__(self, prob)
         self.deform_grid = RandDeformGrid(
@@ -1662,11 +1753,11 @@ def __init__(
             shear_range=shear_range,
             translate_range=translate_range,
             scale_range=scale_range,
-            as_tensor_output=True,
             device=device,
         )
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.resampler = Resample(device=device)
 
+        self.device = device
         self.spatial_size = spatial_size
         self.mode: GridSampleMode = look_up_option(mode, GridSampleMode)
         self.padding_mode: GridSamplePadMode = look_up_option(padding_mode, GridSamplePadMode)
@@ -1686,11 +1777,11 @@ def randomize(self, spatial_size: Sequence[int]) -> None:
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Tuple[int, int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W),
@@ -1711,15 +1802,19 @@ def __call__(
             grid = self.rand_affine_grid(grid=grid)
             grid = torch.nn.functional.interpolate(  # type: ignore
                 recompute_scale_factor=True,
-                input=torch.as_tensor(grid).unsqueeze(0),
+                input=grid.unsqueeze(0),
                 scale_factor=list(ensure_tuple(self.deform_grid.spacing)),
                 mode=InterpolateMode.BICUBIC.value,
                 align_corners=False,
             )
             grid = CenterSpatialCrop(roi_size=sp_size)(grid[0])
         else:
-            grid = create_grid(spatial_size=sp_size)
-        return self.resampler(img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode)
+            _device = img.device if isinstance(img, torch.Tensor) else self.device
+            grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
+        out: NdarrayOrTensor = self.resampler(
+            img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode
+        )
+        return out
 
 
 class Rand3DElastic(RandomizableTransform):
@@ -1729,6 +1824,9 @@ class Rand3DElastic(RandomizableTransform):
 
     """
 
+    backend = Resample.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         sigma_range: Tuple[float, float],
@@ -1786,17 +1884,25 @@ def __init__(
             padding_mode: {``"zeros"``, ``"border"``, ``"reflection"``}
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         RandomizableTransform.__init__(self, prob)
-        self.rand_affine_grid = RandAffineGrid(rotate_range, shear_range, translate_range, scale_range, True, device)
-        self.resampler = Resample(as_tensor_output=as_tensor_output, device=device)
+        self.rand_affine_grid = RandAffineGrid(
+            rotate_range=rotate_range,
+            shear_range=shear_range,
+            translate_range=translate_range,
+            scale_range=scale_range,
+            device=device,
+        )
+        self.resampler = Resample(device=device)
 
         self.sigma_range = sigma_range
         self.magnitude_range = magnitude_range
@@ -1826,11 +1932,11 @@ def randomize(self, grid_size: Sequence[int]) -> None:
 
     def __call__(
         self,
-        img: Union[np.ndarray, torch.Tensor],
+        img: NdarrayOrTensor,
         spatial_size: Optional[Union[Tuple[int, int, int], int]] = None,
         mode: Optional[Union[GridSampleMode, str]] = None,
         padding_mode: Optional[Union[GridSamplePadMode, str]] = None,
-    ) -> Union[np.ndarray, torch.Tensor]:
+    ) -> NdarrayOrTensor:
         """
         Args:
             img: shape must be (num_channels, H, W, D),
@@ -1846,16 +1952,19 @@ def __call__(
         """
         sp_size = fall_back_tuple(spatial_size or self.spatial_size, img.shape[1:])
         self.randomize(grid_size=sp_size)
-        grid = create_grid(spatial_size=sp_size)
+        _device = img.device if isinstance(img, torch.Tensor) else self.device
+        grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
         if self._do_transform:
             if self.rand_offset is None:
-                raise AssertionError
-            grid = torch.as_tensor(np.ascontiguousarray(grid), device=self.device)
-            gaussian = GaussianFilter(3, self.sigma, 3.0).to(device=self.device)
-            offset = torch.as_tensor(self.rand_offset, device=self.device).unsqueeze(0)
+                raise RuntimeError("rand_offset is not initialized.")
+            gaussian = GaussianFilter(3, self.sigma, 3.0).to(device=_device)
+            offset = torch.as_tensor(self.rand_offset, device=_device).unsqueeze(0)
             grid[:3] += gaussian(offset)[0] * self.magnitude
             grid = self.rand_affine_grid(grid=grid)
-        return self.resampler(img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode)
+        out: NdarrayOrTensor = self.resampler(
+            img, grid, mode=mode or self.mode, padding_mode=padding_mode or self.padding_mode
+        )
+        return out
 
 
 class AddCoordinateChannels(Transform):
@@ -1885,6 +1994,7 @@ def __call__(self, img: Union[np.ndarray, torch.Tensor]):
         Args:
             img: data to be transformed, assuming `img` is channel first.
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         if max(self.spatial_channels) > img.ndim - 1:
             raise ValueError(
                 f"input has {img.ndim-1} spatial dimensions, cannot add AddCoordinateChannels channel for "
diff --git a/monai/transforms/spatial/dictionary.py b/monai/transforms/spatial/dictionary.py
index b0558a6556..bd10aad8f7 100644
--- a/monai/transforms/spatial/dictionary.py
+++ b/monai/transforms/spatial/dictionary.py
@@ -50,12 +50,15 @@
     GridSamplePadMode,
     InterpolateMode,
     NumpyPadMode,
+    PytorchPadMode,
     ensure_tuple,
     ensure_tuple_rep,
     fall_back_tuple,
 )
+from monai.utils.deprecated import deprecated_arg
 from monai.utils.enums import InverseKeys
 from monai.utils.module import optional_import
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 nib, _ = optional_import("nibabel")
 
@@ -115,7 +118,7 @@
 GridSampleModeSequence = Union[Sequence[Union[GridSampleMode, str]], GridSampleMode, str]
 GridSamplePadModeSequence = Union[Sequence[Union[GridSamplePadMode, str]], GridSamplePadMode, str]
 InterpolateModeSequence = Union[Sequence[Union[InterpolateMode, str]], InterpolateMode, str]
-NumpyPadModeSequence = Union[Sequence[Union[NumpyPadMode, str]], NumpyPadMode, str]
+PadModeSequence = Union[Sequence[Union[NumpyPadMode, PytorchPadMode, str]], NumpyPadMode, PytorchPadMode, str]
 
 
 class Spacingd(MapTransform, InvertibleTransform):
@@ -132,6 +135,8 @@ class Spacingd(MapTransform, InvertibleTransform):
         :py:class:`monai.transforms.Spacing`
     """
 
+    backend = Spacing.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -208,8 +213,8 @@ def __init__(
         self.meta_key_postfix = ensure_tuple_rep(meta_key_postfix, len(self.keys))
 
     def __call__(
-        self, data: Mapping[Union[Hashable, str], Dict[str, np.ndarray]]
-    ) -> Dict[Union[Hashable, str], Union[np.ndarray, Dict[str, np.ndarray]]]:
+        self, data: Mapping[Union[Hashable, str], Dict[str, NdarrayOrTensor]]
+    ) -> Dict[Union[Hashable, str], Union[NdarrayOrTensor, Dict[str, NdarrayOrTensor]]]:
         d: Dict = dict(data)
         for key, mode, padding_mode, align_corners, dtype, meta_key, meta_key_postfix in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners, self.dtype, self.meta_keys, self.meta_key_postfix
@@ -223,7 +228,7 @@ def __call__(
             # using affine fetched from d[affine_key]
             original_spatial_shape = d[key].shape[1:]
             d[key], old_affine, new_affine = self.spacing_transform(
-                data_array=np.asarray(d[key]),
+                data_array=d[key],
                 affine=meta_data["affine"],
                 mode=mode,
                 padding_mode=padding_mode,
@@ -246,7 +251,7 @@ def __call__(
             meta_data["affine"] = new_affine
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
@@ -266,15 +271,15 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             inverse_transform = Spacing(orig_pixdim, diagonal=self.spacing_transform.diagonal)
             # Apply inverse
             d[key], _, new_affine = inverse_transform(
-                data_array=np.asarray(d[key]),
-                affine=meta_data["affine"],
+                data_array=d[key],
+                affine=meta_data["affine"],  # type: ignore
                 mode=mode,
                 padding_mode=padding_mode,
                 align_corners=False if align_corners == "none" else align_corners,
                 dtype=dtype,
                 output_spatial_shape=orig_size,
             )
-            meta_data["affine"] = new_affine
+            meta_data["affine"] = new_affine  # type: ignore
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -382,6 +387,8 @@ class Rotate90d(MapTransform, InvertibleTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Rotate90`.
     """
 
+    backend = Rotate90.backend
+
     def __init__(
         self, keys: KeysCollection, k: int = 1, spatial_axes: Tuple[int, int] = (0, 1), allow_missing_keys: bool = False
     ) -> None:
@@ -395,14 +402,14 @@ def __init__(
         super().__init__(keys, allow_missing_keys)
         self.rotator = Rotate90(k, spatial_axes)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key in self.key_iterator(d):
             self.push_transform(d, key)
             d[key] = self.rotator(d[key])
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             _ = self.get_most_recent_transform(d, key)
@@ -411,9 +418,6 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             num_times_rotated = self.rotator.k
             num_times_to_rotate = 4 - num_times_rotated
             inverse_transform = Rotate90(num_times_to_rotate, spatial_axes)
-            # Might need to convert to numpy
-            if isinstance(d[key], torch.Tensor):
-                d[key] = torch.Tensor(d[key]).cpu().numpy()
             # Apply inverse
             d[key] = inverse_transform(d[key])
             # Remove the applied transform
@@ -429,6 +433,8 @@ class RandRotate90d(RandomizableTransform, MapTransform, InvertibleTransform):
     in the plane specified by `spatial_axes`.
     """
 
+    backend = Rotate90.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -461,7 +467,7 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self._rand_k = self.R.randint(self.max_k) + 1
         super().randomize(None)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Mapping[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Mapping[Hashable, NdarrayOrTensor]:
         self.randomize()
         d = dict(data)
 
@@ -472,7 +478,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Mapping[Hashable, np.
             self.push_transform(d, key, extra_info={"rand_k": self._rand_k})
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -482,9 +488,6 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 num_times_rotated = transform[InverseKeys.EXTRA_INFO]["rand_k"]
                 num_times_to_rotate = 4 - num_times_rotated
                 inverse_transform = Rotate90(num_times_to_rotate, self.spatial_axes)
-                # Might need to convert to numpy
-                if isinstance(d[key], torch.Tensor):
-                    d[key] = torch.Tensor(d[key]).cpu().numpy()
                 # Apply inverse
                 d[key] = inverse_transform(d[key])
             # Remove the applied transform
@@ -520,6 +523,8 @@ class Resized(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = Resize.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -534,7 +539,7 @@ def __init__(
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
         self.resizer = Resize(spatial_size=spatial_size, size_mode=size_mode)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, align_corners in self.key_iterator(d, self.mode, self.align_corners):
             self.push_transform(
@@ -548,7 +553,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             d[key] = self.resizer(d[key], mode=mode, align_corners=align_corners)
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -574,6 +579,9 @@ class Affined(MapTransform, InvertibleTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Affine`.
     """
 
+    backend = Affine.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -584,7 +592,7 @@ def __init__(
         spatial_size: Optional[Union[Sequence[int], int]] = None,
         mode: GridSampleModeSequence = GridSampleMode.BILINEAR,
         padding_mode: GridSamplePadModeSequence = GridSamplePadMode.REFLECTION,
-        as_tensor_output: bool = False,
+        as_tensor_output: bool = True,
         device: Optional[torch.device] = None,
         allow_missing_keys: bool = False,
     ) -> None:
@@ -621,14 +629,16 @@ def __init__(
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`monai.transforms.compose.MapTransform`
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         self.affine = Affine(
@@ -637,15 +647,12 @@ def __init__(
             translate_params=translate_params,
             scale_params=scale_params,
             spatial_size=spatial_size,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
             orig_size = d[key].shape[1:]
@@ -662,7 +669,7 @@ def __call__(
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
@@ -678,10 +685,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
             grid, _ = affine_grid(orig_size)  # type: ignore
 
             # Apply inverse transform
-            out = self.affine.resampler(d[key], grid, mode, padding_mode)
-
-            # Convert to numpy
-            d[key] = out if isinstance(out, np.ndarray) else out.cpu().numpy()
+            d[key] = self.affine.resampler(d[key], grid, mode, padding_mode)
 
             # Remove the applied transform
             self.pop_transform(d, key)
@@ -694,6 +698,9 @@ class RandAffined(RandomizableTransform, MapTransform, InvertibleTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.RandAffine`.
     """
 
+    backend = Affine.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -754,14 +761,16 @@ def __init__(
             cache_grid: whether to cache the identity sampling grid.
                 If the spatial size is not dynamically defined by input image, enabling this option could
                 accelerate the transform.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`monai.transforms.compose.MapTransform`
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
@@ -773,7 +782,6 @@ def __init__(
             scale_range=scale_range,
             spatial_size=spatial_size,
             cache_grid=cache_grid,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
@@ -790,18 +798,17 @@ def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
         self.rand_affine.randomize()
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         self.randomize()
 
+        device = self.rand_affine.resampler.device
+
         sp_size = fall_back_tuple(self.rand_affine.spatial_size, data[self.keys[0]].shape[1:])
         # change image size or do random transform
         do_resampling = self._do_transform or (sp_size != ensure_tuple(data[self.keys[0]].shape[1:]))
-
-        # to be consistent with the self._do_transform case (dtype and device)
-        affine = torch.as_tensor(np.eye(len(sp_size) + 1), device=self.rand_affine.rand_affine_grid.device)
+        affine: torch.Tensor = torch.eye(len(sp_size) + 1, dtype=torch.float64, device=device)
+        # converting affine to tensor because the resampler currently only support torch backend
         grid = None
         if do_resampling:  # need to prepare grid
             grid = self.rand_affine.get_identity_grid(sp_size)
@@ -822,24 +829,16 @@ def __call__(
             # do the transform
             if do_resampling:
                 d[key] = self.rand_affine.resampler(d[key], grid, mode=mode, padding_mode=padding_mode)
-            # if not doing transform and and spatial size is unchanged, only need to do numpy/torch conversion
-            else:
-                if self.rand_affine.resampler.as_tensor_output and not isinstance(d[key], torch.Tensor):
-                    d[key] = torch.Tensor(d[key])
-                elif not self.rand_affine.resampler.as_tensor_output and isinstance(d[key], torch.Tensor):
-                    d[key] = d[key].detach().cpu().numpy()  # type: ignore[union-attr]
 
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
 
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
             # if transform was not performed and spatial size is None, nothing to do.
-            if not transform[InverseKeys.DO_TRANSFORM] and self.rand_affine.spatial_size is None:
-                out: Union[np.ndarray, torch.Tensor] = d[key]
-            else:
+            if transform[InverseKeys.DO_TRANSFORM] or self.rand_affine.spatial_size is not None:
                 orig_size = transform[InverseKeys.ORIG_SIZE]
                 # Create inverse transform
                 fwd_affine = transform[InverseKeys.EXTRA_INFO]["affine"]
@@ -851,10 +850,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 grid, _ = affine_grid(orig_size)  # type: ignore
 
                 # Apply inverse transform
-                out = self.rand_affine.resampler(d[key], grid, mode, padding_mode)
-
-            # Convert to numpy
-            d[key] = out if isinstance(out, np.ndarray) else out.cpu().numpy()
+                d[key] = self.rand_affine.resampler(d[key], grid, mode, padding_mode)
 
             # Remove the applied transform
             self.pop_transform(d, key)
@@ -867,6 +863,9 @@ class Rand2DElasticd(RandomizableTransform, MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Rand2DElastic`.
     """
 
+    backend = Rand2DElastic.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -927,14 +926,16 @@ def __init__(
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
@@ -947,7 +948,6 @@ def __init__(
             translate_range=translate_range,
             scale_range=scale_range,
             spatial_size=spatial_size,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
@@ -964,9 +964,7 @@ def randomize(self, spatial_size: Sequence[int]) -> None:
         super().randomize(None)
         self.rand_2d_elastic.randomize(spatial_size)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
 
         sp_size = fall_back_tuple(self.rand_2d_elastic.spatial_size, data[self.keys[0]].shape[1:])
@@ -984,7 +982,8 @@ def __call__(
             )
             grid = CenterSpatialCrop(roi_size=sp_size)(grid[0])
         else:
-            grid = create_grid(spatial_size=sp_size)
+            _device = self.rand_2d_elastic.deform_grid.device
+            grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
 
         for key, mode, padding_mode in self.key_iterator(d, self.mode, self.padding_mode):
             d[key] = self.rand_2d_elastic.resampler(d[key], grid, mode=mode, padding_mode=padding_mode)
@@ -996,6 +995,9 @@ class Rand3DElasticd(RandomizableTransform, MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.Rand3DElastic`.
     """
 
+    backend = Rand3DElastic.backend
+
+    @deprecated_arg(name="as_tensor_output", since="0.6")
     def __init__(
         self,
         keys: KeysCollection,
@@ -1058,14 +1060,16 @@ def __init__(
                 Padding mode for outside grid values. Defaults to ``"reflection"``.
                 See also: https://pytorch.org/docs/stable/nn.functional.html#grid-sample
                 It also can be a sequence of string, each element corresponds to a key in ``keys``.
-            as_tensor_output: the computation is implemented using pytorch tensors, this option specifies
-                whether to convert it back to numpy arrays.
             device: device on which the tensor will be allocated.
             allow_missing_keys: don't raise exception if key is missing.
 
         See also:
             - :py:class:`RandAffineGrid` for the random affine parameters configurations.
             - :py:class:`Affine` for the affine transformation parameters configurations.
+
+        .. deprecated:: 0.6.0
+            ``as_tensor_output`` is deprecated.
+
         """
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
@@ -1078,7 +1082,6 @@ def __init__(
             translate_range=translate_range,
             scale_range=scale_range,
             spatial_size=spatial_size,
-            as_tensor_output=as_tensor_output,
             device=device,
         )
         self.mode = ensure_tuple_rep(mode, len(self.keys))
@@ -1095,19 +1098,17 @@ def randomize(self, grid_size: Sequence[int]) -> None:
         super().randomize(None)
         self.rand_3d_elastic.randomize(grid_size)
 
-    def __call__(
-        self, data: Mapping[Hashable, Union[np.ndarray, torch.Tensor]]
-    ) -> Dict[Hashable, Union[np.ndarray, torch.Tensor]]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         sp_size = fall_back_tuple(self.rand_3d_elastic.spatial_size, data[self.keys[0]].shape[1:])
 
         self.randomize(grid_size=sp_size)
-        grid = create_grid(spatial_size=sp_size)
+        _device = self.rand_3d_elastic.device
+        grid = create_grid(spatial_size=sp_size, device=_device, backend="torch")
         if self._do_transform:
             device = self.rand_3d_elastic.device
-            grid = torch.tensor(grid).to(device)
             gaussian = GaussianFilter(spatial_dims=3, sigma=self.rand_3d_elastic.sigma, truncated=3.0).to(device)
-            offset = torch.tensor(self.rand_3d_elastic.rand_offset, device=device).unsqueeze(0)
+            offset = torch.as_tensor(self.rand_3d_elastic.rand_offset, device=device).unsqueeze(0)
             grid[:3] += gaussian(offset)[0] * self.rand_3d_elastic.magnitude
             grid = self.rand_3d_elastic.rand_affine_grid(grid=grid)
 
@@ -1288,6 +1289,8 @@ class Rotated(MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = Rotate.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1296,7 +1299,7 @@ def __init__(
         mode: GridSampleModeSequence = GridSampleMode.BILINEAR,
         padding_mode: GridSamplePadModeSequence = GridSamplePadMode.BORDER,
         align_corners: Union[Sequence[bool], bool] = False,
-        dtype: Union[Sequence[DtypeLike], DtypeLike] = np.float64,
+        dtype: Union[Sequence[Union[DtypeLike, torch.dtype]], Union[DtypeLike, torch.dtype]] = np.float64,
         allow_missing_keys: bool = False,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
@@ -1307,7 +1310,7 @@ def __init__(
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
         self.dtype = ensure_tuple_rep(dtype, len(self.keys))
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, padding_mode, align_corners, dtype in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners, self.dtype
@@ -1334,7 +1337,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
@@ -1352,12 +1355,17 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 align_corners=False if align_corners == "none" else align_corners,
                 reverse_indexing=True,
             )
+            img_t: torch.Tensor
+            img_t, *_ = convert_data_type(d[key], torch.Tensor, dtype=dtype)  # type: ignore
+            transform_t: torch.Tensor
+            transform_t, *_ = convert_to_dst_type(inv_rot_mat, img_t)  # type: ignore
+
             output = xform(
-                torch.as_tensor(np.ascontiguousarray(d[key]).astype(dtype)).unsqueeze(0),
-                torch.as_tensor(np.ascontiguousarray(inv_rot_mat).astype(dtype)),
+                img_t.unsqueeze(0),
+                transform_t,
                 spatial_size=transform[InverseKeys.ORIG_SIZE],
             )
-            d[key] = np.asarray(output.squeeze(0).detach().cpu().numpy(), dtype=np.float32)
+            d[key] = output.squeeze(0).detach().float()
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -1399,6 +1407,8 @@ class RandRotated(RandomizableTransform, MapTransform, InvertibleTransform):
         allow_missing_keys: don't raise exception if key is missing.
     """
 
+    backend = Rotate.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1410,7 +1420,7 @@ def __init__(
         mode: GridSampleModeSequence = GridSampleMode.BILINEAR,
         padding_mode: GridSamplePadModeSequence = GridSamplePadMode.BORDER,
         align_corners: Union[Sequence[bool], bool] = False,
-        dtype: Union[Sequence[DtypeLike], DtypeLike] = np.float64,
+        dtype: Union[Sequence[Union[DtypeLike, torch.dtype]], Union[DtypeLike, torch.dtype]] = np.float64,
         allow_missing_keys: bool = False,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
@@ -1441,14 +1451,11 @@ def randomize(self, data: Optional[Any] = None) -> None:
         self.y = self.R.uniform(low=self.range_y[0], high=self.range_y[1])
         self.z = self.R.uniform(low=self.range_z[0], high=self.range_z[1])
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         self.randomize()
         d = dict(data)
         angle: Union[Sequence[float], float] = self.x if d[self.keys[0]].ndim == 3 else (self.x, self.y, self.z)
-        rotator = Rotate(
-            angle=angle,
-            keep_size=self.keep_size,
-        )
+        rotator = Rotate(angle=angle, keep_size=self.keep_size)
         for key, mode, padding_mode, align_corners, dtype in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners, self.dtype
         ):
@@ -1477,7 +1484,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key, dtype in self.key_iterator(d, self.dtype):
             transform = self.get_most_recent_transform(d, key)
@@ -1497,12 +1504,17 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                     align_corners=False if align_corners == "none" else align_corners,
                     reverse_indexing=True,
                 )
+                img_t: torch.Tensor
+                img_t, *_ = convert_data_type(d[key], torch.Tensor, dtype=dtype)  # type: ignore
+                transform_t: torch.Tensor
+                transform_t, *_ = convert_to_dst_type(inv_rot_mat, img_t)  # type: ignore
+                output: torch.Tensor
                 output = xform(
-                    torch.as_tensor(np.ascontiguousarray(d[key]).astype(dtype)).unsqueeze(0),
-                    torch.as_tensor(np.ascontiguousarray(inv_rot_mat).astype(dtype)),
+                    img_t.unsqueeze(0),
+                    transform_t,
                     spatial_size=transform[InverseKeys.ORIG_SIZE],
                 )
-                d[key] = np.asarray(output.squeeze(0).detach().cpu().numpy(), dtype=np.float32)
+                d[key] = output.squeeze(0).detach().float()
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -1522,39 +1534,44 @@ class Zoomd(MapTransform, InvertibleTransform):
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of string, each element corresponds to a key in ``keys``.
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of bool or None, each element corresponds to a key in ``keys``.
         keep_size: Should keep original size (pad if needed), default is True.
         allow_missing_keys: don't raise exception if key is missing.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
-            more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+        kwargs: other arguments for the `np.pad` or `torch.pad` function.
+            note that `np.pad` treats channel dimension as the first dimension.
 
     """
 
+    backend = Zoom.backend
+
     def __init__(
         self,
         keys: KeysCollection,
         zoom: Union[Sequence[float], float],
         mode: InterpolateModeSequence = InterpolateMode.AREA,
-        padding_mode: NumpyPadModeSequence = NumpyPadMode.EDGE,
+        padding_mode: PadModeSequence = NumpyPadMode.EDGE,
         align_corners: Union[Sequence[Optional[bool]], Optional[bool]] = None,
         keep_size: bool = True,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         super().__init__(keys, allow_missing_keys)
         self.mode = ensure_tuple_rep(mode, len(self.keys))
         self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
-        self.zoomer = Zoom(zoom=zoom, keep_size=keep_size, **np_kwargs)
+        self.zoomer = Zoom(zoom=zoom, keep_size=keep_size, **kwargs)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         for key, mode, padding_mode, align_corners in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners
@@ -1576,7 +1593,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
             )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1594,7 +1611,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                 align_corners=None if align_corners == "none" else align_corners,
             )
             # Size might be out by 1 voxel so pad
-            d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])
+            d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])  # type: ignore
             # Remove the applied transform
             self.pop_transform(d, key)
 
@@ -1622,21 +1639,26 @@ class RandZoomd(RandomizableTransform, MapTransform, InvertibleTransform):
             The interpolation mode. Defaults to ``"area"``.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of string, each element corresponds to a key in ``keys``.
-        padding_mode: {``"constant"``, ``"edge``", ``"linear_ramp``", ``"maximum``", ``"mean``", `"median``",
-            ``"minimum``", `"reflect``", ``"symmetric``", ``"wrap``", ``"empty``", ``"<function>``"}
+        padding_mode: available modes for numpy array:{``"constant"``, ``"edge"``, ``"linear_ramp"``, ``"maximum"``,
+            ``"mean"``, ``"median"``, ``"minimum"``, ``"reflect"``, ``"symmetric"``, ``"wrap"``, ``"empty"``}
+            available modes for PyTorch Tensor: {``"constant"``, ``"reflect"``, ``"replicate"``, ``"circular"``}.
+            One of the listed string values or a user supplied function. Defaults to ``"constant"``.
             The mode to pad data after zooming.
-            See also: https://numpy.org/doc/stable/reference/generated/numpy.pad.html
+            See also: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
+            https://pytorch.org/docs/stable/generated/torch.nn.functional.pad.html
         align_corners: This only has an effect when mode is
             'linear', 'bilinear', 'bicubic' or 'trilinear'. Default: None.
             See also: https://pytorch.org/docs/stable/nn.functional.html#interpolate
             It also can be a sequence of bool or None, each element corresponds to a key in ``keys``.
         keep_size: Should keep original size (pad if needed), default is True.
         allow_missing_keys: don't raise exception if key is missing.
-        np_kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
+        kwargs: other args for `np.pad` API, note that `np.pad` treats channel dimension as the first dimension.
             more details: https://numpy.org/doc/1.18/reference/generated/numpy.pad.html
 
     """
 
+    backend = Zoom.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1644,11 +1666,11 @@ def __init__(
         min_zoom: Union[Sequence[float], float] = 0.9,
         max_zoom: Union[Sequence[float], float] = 1.1,
         mode: InterpolateModeSequence = InterpolateMode.AREA,
-        padding_mode: NumpyPadModeSequence = NumpyPadMode.EDGE,
+        padding_mode: PadModeSequence = NumpyPadMode.EDGE,
         align_corners: Union[Sequence[Optional[bool]], Optional[bool]] = None,
         keep_size: bool = True,
         allow_missing_keys: bool = False,
-        **np_kwargs,
+        **kwargs,
     ) -> None:
         MapTransform.__init__(self, keys, allow_missing_keys)
         RandomizableTransform.__init__(self, prob)
@@ -1661,7 +1683,7 @@ def __init__(
         self.padding_mode = ensure_tuple_rep(padding_mode, len(self.keys))
         self.align_corners = ensure_tuple_rep(align_corners, len(self.keys))
         self.keep_size = keep_size
-        self.np_kwargs = np_kwargs
+        self.kwargs = kwargs
 
         self._zoom: Sequence[float] = [1.0]
 
@@ -1669,7 +1691,7 @@ def randomize(self, data: Optional[Any] = None) -> None:
         super().randomize(None)
         self._zoom = [self.R.uniform(l, h) for l, h in zip(self.min_zoom, self.max_zoom)]
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         # match the spatial dim of first item
         self.randomize()
         d = dict(data)
@@ -1681,7 +1703,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
         elif len(self._zoom) == 2 and img_dims > 3:
             # if 2 zoom factors provided for 3D data, use the first factor for H and W dims, second factor for D dim
             self._zoom = ensure_tuple_rep(self._zoom[0], img_dims - 2) + ensure_tuple(self._zoom[-1])
-        zoomer = Zoom(self._zoom, keep_size=self.keep_size, **self.np_kwargs)
+        zoomer = Zoom(self._zoom, keep_size=self.keep_size, **self.kwargs)
         for key, mode, padding_mode, align_corners in self.key_iterator(
             d, self.mode, self.padding_mode, self.align_corners
         ):
@@ -1704,7 +1726,7 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.nda
                 )
         return d
 
-    def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def inverse(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = deepcopy(dict(data))
         for key in self.key_iterator(d):
             transform = self.get_most_recent_transform(d, key)
@@ -1724,7 +1746,7 @@ def inverse(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndar
                     align_corners=None if align_corners == "none" else align_corners,
                 )
                 # Size might be out by 1 voxel so pad
-                d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])
+                d[key] = SpatialPad(transform[InverseKeys.ORIG_SIZE], mode="edge")(d[key])  # type: ignore
             # Remove the applied transform
             self.pop_transform(d, key)
 
diff --git a/monai/transforms/utility/array.py b/monai/transforms/utility/array.py
index 2eb6c447c6..ffc3b99cb5 100644
--- a/monai/transforms/utility/array.py
+++ b/monai/transforms/utility/array.py
@@ -31,16 +31,25 @@
     map_binary_to_indices,
     map_classes_to_indices,
 )
-from monai.transforms.utils_pytorch_numpy_unification import in1d, moveaxis
-from monai.utils import convert_to_numpy, convert_to_tensor, ensure_tuple, look_up_option, min_version, optional_import
+from monai.transforms.utils_pytorch_numpy_unification import in1d, moveaxis, unravel_indices
+from monai.utils import (
+    convert_data_type,
+    convert_to_cupy,
+    convert_to_numpy,
+    convert_to_tensor,
+    ensure_tuple,
+    get_equivalent_dtype,
+    look_up_option,
+    min_version,
+    optional_import,
+)
 from monai.utils.enums import TransformBackends
 from monai.utils.misc import is_module_ver_at_least
-from monai.utils.type_conversion import convert_data_type
 
 PILImageImage, has_pil = optional_import("PIL.Image", name="Image")
 pil_image_fromarray, _ = optional_import("PIL.Image", name="fromarray")
 cp, has_cp = optional_import("cupy")
-cp_ndarray, _ = optional_import("cupy", name="ndarray")
+
 
 __all__ = [
     "Identity",
@@ -321,8 +330,6 @@ def __call__(self, img: NdarrayOrTensor, dtype: Optional[Union[DtypeLike, torch.
             TypeError: When ``img`` type is not in ``Union[numpy.ndarray, torch.Tensor]``.
 
         """
-        if not isinstance(img, (torch.Tensor, np.ndarray)):
-            raise TypeError(f"img must be one of (numpy.ndarray, torch.Tensor) but is {type(img).__name__}.")
         img_out, *_ = convert_data_type(img, output_type=type(img), dtype=dtype or self.dtype)
         return img_out
 
@@ -334,11 +341,16 @@ class ToTensor(Transform):
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
+    def __init__(self, dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None) -> None:
+        super().__init__()
+        self.dtype = dtype
+        self.device = device
+
     def __call__(self, img: NdarrayOrTensor) -> torch.Tensor:
         """
         Apply the transform to `img` and make it contiguous.
         """
-        return convert_to_tensor(img, wrap_sequence=True)  # type: ignore
+        return convert_to_tensor(img, dtype=self.dtype, device=self.device, wrap_sequence=True)  # type: ignore
 
 
 class EnsureType(Transform):
@@ -350,19 +362,24 @@ class EnsureType(Transform):
 
     Args:
         data_type: target data type to convert, should be "tensor" or "numpy".
+        dtype: target data content type to convert, for example: np.float32, torch.float, etc.
+        device: for Tensor data type, specify the target device.
 
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
-    def __init__(self, data_type: str = "tensor") -> None:
-        data_type = data_type.lower()
-        if data_type not in ("tensor", "numpy"):
-            raise ValueError("`data type` must be 'tensor' or 'numpy'.")
-
-        self.data_type = data_type
+    def __init__(
+        self,
+        data_type: str = "tensor",
+        dtype: Optional[Union[DtypeLike, torch.dtype]] = None,
+        device: Optional[torch.device] = None,
+    ) -> None:
+        self.data_type = look_up_option(data_type.lower(), {"tensor", "numpy"})
+        self.dtype = dtype
+        self.device = device
 
-    def __call__(self, data: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __call__(self, data: NdarrayOrTensor):
         """
         Args:
             data: input data can be PyTorch Tensor, numpy array, list, dictionary, int, float, bool, str, etc.
@@ -371,7 +388,11 @@ def __call__(self, data: NdarrayOrTensor) -> NdarrayOrTensor:
                 if applicable.
 
         """
-        return convert_to_tensor(data) if self.data_type == "tensor" else convert_to_numpy(data)  # type: ignore
+        if self.data_type == "tensor":
+            dtype_ = get_equivalent_dtype(self.dtype, torch.Tensor)
+            return convert_to_tensor(data, dtype=dtype_, device=self.device)
+        dtype_ = get_equivalent_dtype(self.dtype, np.ndarray)
+        return convert_to_numpy(data, dtype=dtype_)
 
 
 class ToNumpy(Transform):
@@ -381,27 +402,36 @@ class ToNumpy(Transform):
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
+    def __init__(self, dtype: Optional[DtypeLike] = None) -> None:
+        super().__init__()
+        self.dtype = dtype
+
     def __call__(self, img: NdarrayOrTensor) -> np.ndarray:
         """
         Apply the transform to `img` and make it contiguous.
         """
-        return convert_to_numpy(img)  # type: ignore
+        return convert_to_numpy(img, dtype=self.dtype)  # type: ignore
 
 
 class ToCupy(Transform):
     """
     Converts the input data to CuPy array, can support list or tuple of numbers, NumPy and PyTorch Tensor.
+
+    Args:
+        dtype: data type specifier. It is inferred from the input by default.
     """
 
     backend = [TransformBackends.TORCH, TransformBackends.NUMPY]
 
-    def __call__(self, img: NdarrayOrTensor) -> NdarrayOrTensor:
+    def __init__(self, dtype=None) -> None:
+        super().__init__()
+        self.dtype = dtype
+
+    def __call__(self, data: NdarrayOrTensor):
         """
-        Apply the transform to `img` and make it contiguous.
+        Create a CuPy array from `data` and make it contiguous
         """
-        if isinstance(img, torch.Tensor):
-            img = img.detach().cpu().numpy()
-        return cp.ascontiguousarray(cp.asarray(img))  # type: ignore
+        return convert_to_cupy(data, self.dtype)
 
 
 class ToPIL(Transform):
@@ -547,7 +577,7 @@ def __call__(
         lines = [f"{prefix or self.prefix} statistics:"]
 
         if self.data_type if data_type is None else data_type:
-            lines.append(f"Type: {type(img)}")
+            lines.append(f"Type: {type(img)} {img.dtype if hasattr(img, 'dtype') else None}")
         if self.data_shape if data_shape is None else data_shape:
             lines.append(f"Shape: {img.shape}")
         if self.value_range if value_range is None else value_range:
@@ -759,16 +789,18 @@ class FgBgToIndices(Transform):
 
     """
 
+    backend = [TransformBackends.NUMPY, TransformBackends.TORCH]
+
     def __init__(self, image_threshold: float = 0.0, output_shape: Optional[Sequence[int]] = None) -> None:
         self.image_threshold = image_threshold
         self.output_shape = output_shape
 
     def __call__(
         self,
-        label: np.ndarray,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        image: Optional[NdarrayOrTensor] = None,
         output_shape: Optional[Sequence[int]] = None,
-    ) -> Tuple[np.ndarray, np.ndarray]:
+    ) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
         """
         Args:
             label: input data to compute foreground and background indices.
@@ -781,13 +813,15 @@ def __call__(
             output_shape = self.output_shape
         fg_indices, bg_indices = map_binary_to_indices(label, image, self.image_threshold)
         if output_shape is not None:
-            fg_indices = np.stack([np.unravel_index(i, output_shape) for i in fg_indices])
-            bg_indices = np.stack([np.unravel_index(i, output_shape) for i in bg_indices])
-
+            fg_indices = unravel_indices(fg_indices, output_shape)
+            bg_indices = unravel_indices(bg_indices, output_shape)
         return fg_indices, bg_indices
 
 
 class ClassesToIndices(Transform):
+
+    backend = [TransformBackends.NUMPY, TransformBackends.TORCH]
+
     def __init__(
         self,
         num_classes: Optional[int] = None,
@@ -814,10 +848,10 @@ def __init__(
 
     def __call__(
         self,
-        label: np.ndarray,
-        image: Optional[np.ndarray] = None,
+        label: NdarrayOrTensor,
+        image: Optional[NdarrayOrTensor] = None,
         output_shape: Optional[Sequence[int]] = None,
-    ) -> List[np.ndarray]:
+    ) -> List[NdarrayOrTensor]:
         """
         Args:
             label: input data to compute the indices of every class.
@@ -826,11 +860,13 @@ def __call__(
             output_shape: expected shape of output indices. if None, use `self.output_shape` instead.
 
         """
+
         if output_shape is None:
             output_shape = self.output_shape
+        indices: List[NdarrayOrTensor]
         indices = map_classes_to_indices(label, self.num_classes, image, self.image_threshold)
         if output_shape is not None:
-            indices = [np.stack([np.unravel_index(i, output_shape) for i in array]) for array in indices]
+            indices = [unravel_indices(cls_indices, output_shape) for cls_indices in indices]
 
         return indices
 
@@ -846,13 +882,12 @@ class ConvertToMultiChannelBasedOnBratsClasses(Transform):
     """
 
     def __call__(self, img: np.ndarray) -> np.ndarray:
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         # if img has channel dim, squeeze it
         if img.ndim == 4 and img.shape[0] == 1:
             img = np.squeeze(img, axis=0)
 
-        result = []
-        # merge labels 1 (tumor non-enh) and 4 (tumor enh) to TC
-        result.append(np.logical_or(img == 1, img == 4))
+        result = [np.logical_or(img == 1, img == 4)]
         # merge labels 1 (tumor non-enh) and 4 (tumor enh) and 2 (large edema) to WT
         result.append(np.logical_or(np.logical_or(img == 1, img == 4), img == 2))
         # label 4 is ET
@@ -912,6 +947,9 @@ def __call__(
         if label.shape[0] != 1:
             raise ValueError("Only supports single channel labels!")
 
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
+        label, *_ = convert_data_type(label, np.ndarray)  # type: ignore
+
         # Generate extreme points
         self.randomize(label[0, :])
 
@@ -948,6 +986,7 @@ def __call__(self, img: torch.Tensor):
             img: PyTorch Tensor data for the TorchVision transform.
 
         """
+        img, *_ = convert_data_type(img, torch.Tensor)  # type: ignore
         return self.trans(img)
 
 
@@ -978,7 +1017,7 @@ def __init__(self, orig_labels: Sequence, target_labels: Sequence, dtype: DtypeL
         self.dtype = dtype
 
     def __call__(self, img: np.ndarray):
-        img = np.asarray(img)
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         img_flat = img.flatten()
         try:
             out_flat = np.copy(img_flat).astype(self.dtype)
@@ -1034,6 +1073,7 @@ def __call__(
                 mask must have the same shape as input `img`.
 
         """
+        img, *_ = convert_data_type(img, np.ndarray)  # type: ignore
         if meta_data is None:
             meta_data = {}
 
@@ -1044,11 +1084,11 @@ def __call__(
             img_ = img[mask]
 
         supported_ops = {
-            "mean": lambda x: np.nanmean(x),
-            "median": lambda x: np.nanmedian(x),
-            "max": lambda x: np.nanmax(x),
-            "min": lambda x: np.nanmin(x),
-            "std": lambda x: np.nanstd(x),
+            "mean": np.nanmean,
+            "median": np.nanmedian,
+            "max": np.nanmax,
+            "min": np.nanmin,
+            "std": np.nanstd,
         }
 
         def _compute(op: Callable, data: np.ndarray):
@@ -1060,7 +1100,7 @@ def _compute(op: Callable, data: np.ndarray):
         for o in self.ops:
             if isinstance(o, str):
                 o = look_up_option(o, supported_ops.keys())
-                meta_data[self.key_prefix + "_" + o] = _compute(supported_ops[o], img_)
+                meta_data[self.key_prefix + "_" + o] = _compute(supported_ops[o], img_)  # type: ignore
             elif callable(o):
                 meta_data[self.key_prefix + "_custom_" + str(custom_index)] = _compute(o, img_)
                 custom_index += 1
@@ -1083,6 +1123,8 @@ class ToDevice(Transform):
 
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(self, device: Union[torch.device, str], **kwargs) -> None:
         """
         Args:
@@ -1099,3 +1141,78 @@ def __call__(self, img: torch.Tensor):
             raise ValueError("img must be PyTorch Tensor, consider converting img by `EnsureType` transform first.")
 
         return img.to(self.device, **self.kwargs)
+
+
+class CuCIM(Transform):
+    """
+    Wrap a non-randomized cuCIM transform, defined based on the transform name and args.
+    For randomized transforms (or randomly applying a transform) use :py:class:`monai.transforms.RandCuCIM`.
+
+    Args:
+        name: the transform name in CuCIM package
+        args: parameters for the CuCIM transform
+        kwargs: parameters for the CuCIM transform
+
+    Note:
+        CuCIM transform only work with CuPy arrays, so this transform expects input data to be `cupy.ndarray`.
+        Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+    """
+
+    def __init__(self, name: str, *args, **kwargs) -> None:
+        super().__init__()
+        self.transform, _ = optional_import("cucim.core.operations.expose.transform", name=name)
+        self.args = args
+        self.kwargs = kwargs
+
+    def __call__(self, data):
+        """
+        Args:
+            data: a CuPy array (`cupy.ndarray`) for the cuCIM transform
+
+        Returns:
+            `cupy.ndarray`
+
+        """
+        return self.transform(data, *self.args, **self.kwargs)
+
+
+class RandCuCIM(CuCIM, RandomizableTransform):
+    """
+    Wrap a randomized cuCIM transform, defined based on the transform name and args,
+    or randomly apply a non-randomized transform.
+    For deterministic non-randomized transforms use :py:class:`monai.transforms.CuCIM`.
+
+    Args:
+        name: the transform name in CuCIM package.
+        apply_prob: the probability to apply the transform (default=1.0)
+        args: parameters for the CuCIM transform.
+        kwargs: parameters for the CuCIM transform.
+
+    Note:
+        - CuCIM transform only work with CuPy arrays, so this transform expects input data to be `cupy.ndarray`.
+          Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+        - If the cuCIM transform is already randomized the `apply_prob` argument has nothing to do with
+          the randomness of the underlying cuCIM transform. `apply_prob` defines if the transform (either randomized
+          or non-randomized) being applied randomly, so it can apply non-randomized tranforms randomly but be careful
+          with setting `apply_prob` to anything than 1.0 when using along with cuCIM's randomized transforms.
+        - If the random factor of the underlying cuCIM transform is not derived from `self.R`,
+          the results may not be deterministic. See Also: :py:class:`monai.transforms.Randomizable`.
+    """
+
+    def __init__(self, name: str, apply_prob: float = 1.0, *args, **kwargs) -> None:
+        CuCIM.__init__(self, name, *args, **kwargs)
+        RandomizableTransform.__init__(self, prob=apply_prob)
+
+    def __call__(self, data):
+        """
+        Args:
+            data: a CuPy array (`cupy.ndarray`) for the cuCIM transform
+
+        Returns:
+            `cupy.ndarray`
+
+        """
+        self.randomize(data)
+        if not self._do_transform:
+            return data
+        return super().__call__(data)
diff --git a/monai/transforms/utility/dictionary.py b/monai/transforms/utility/dictionary.py
index e9bcce93b0..cefb654698 100644
--- a/monai/transforms/utility/dictionary.py
+++ b/monai/transforms/utility/dictionary.py
@@ -15,8 +15,8 @@
 Class names are ended with 'd' to denote dictionary-based transforms.
 """
 
-import copy
 import logging
+import re
 from copy import deepcopy
 from typing import Any, Callable, Dict, Hashable, List, Mapping, Optional, Sequence, Tuple, Union
 
@@ -35,6 +35,7 @@
     CastToType,
     ClassesToIndices,
     ConvertToMultiChannelBasedOnBratsClasses,
+    CuCIM,
     DataStats,
     EnsureChannelFirst,
     EnsureType,
@@ -86,6 +87,9 @@
     "CopyItemsD",
     "CopyItemsDict",
     "CopyItemsd",
+    "CuCIMd",
+    "CuCIMD",
+    "CuCIMDict",
     "DataStatsD",
     "DataStatsDict",
     "DataStatsd",
@@ -116,6 +120,9 @@
     "MapLabelValueD",
     "MapLabelValueDict",
     "MapLabelValued",
+    "RandCuCIMd",
+    "RandCuCIMD",
+    "RandCuCIMDict",
     "RandLambdaD",
     "RandLambdaDict",
     "RandLambdad",
@@ -442,15 +449,23 @@ class ToTensord(MapTransform, InvertibleTransform):
 
     backend = ToTensor.backend
 
-    def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> None:
+    def __init__(
+        self,
+        keys: KeysCollection,
+        dtype: Optional[torch.dtype] = None,
+        device: Optional[torch.device] = None,
+        allow_missing_keys: bool = False,
+    ) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
                 See also: :py:class:`monai.transforms.compose.MapTransform`
+            dtype: target data content type to convert, for example: torch.float, etc.
+            device: specify the target device to put the Tensor data.
             allow_missing_keys: don't raise exception if key is missing.
         """
         super().__init__(keys, allow_missing_keys)
-        self.converter = ToTensor()
+        self.converter = ToTensor(dtype=dtype, device=device)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -486,16 +501,25 @@ class EnsureTyped(MapTransform, InvertibleTransform):
 
     backend = EnsureType.backend
 
-    def __init__(self, keys: KeysCollection, data_type: str = "tensor", allow_missing_keys: bool = False) -> None:
+    def __init__(
+        self,
+        keys: KeysCollection,
+        data_type: str = "tensor",
+        dtype: Optional[Union[DtypeLike, torch.dtype]] = None,
+        device: Optional[torch.device] = None,
+        allow_missing_keys: bool = False,
+    ) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
                 See also: :py:class:`monai.transforms.compose.MapTransform`
             data_type: target data type to convert, should be "tensor" or "numpy".
+            dtype: target data content type to convert, for example: np.float32, torch.float, etc.
+            device: for Tensor data type, specify the target device.
             allow_missing_keys: don't raise exception if key is missing.
         """
         super().__init__(keys, allow_missing_keys)
-        self.converter = EnsureType(data_type=data_type)
+        self.converter = EnsureType(data_type=data_type, dtype=dtype, device=device)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -522,15 +546,21 @@ class ToNumpyd(MapTransform):
 
     backend = ToNumpy.backend
 
-    def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> None:
+    def __init__(
+        self,
+        keys: KeysCollection,
+        dtype: Optional[DtypeLike] = None,
+        allow_missing_keys: bool = False,
+    ) -> None:
         """
         Args:
             keys: keys of the corresponding items to be transformed.
                 See also: :py:class:`monai.transforms.compose.MapTransform`
+            dtype: target data type when converting to numpy array.
             allow_missing_keys: don't raise exception if key is missing.
         """
         super().__init__(keys, allow_missing_keys)
-        self.converter = ToNumpy()
+        self.converter = ToNumpy(dtype=dtype)
 
     def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
         d = dict(data)
@@ -542,19 +572,19 @@ def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, Any]:
 class ToCupyd(MapTransform):
     """
     Dictionary-based wrapper of :py:class:`monai.transforms.ToCupy`.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        dtype: data type specifier. It is inferred from the input by default.
+        allow_missing_keys: don't raise exception if key is missing.
     """
 
     backend = ToCupy.backend
 
-    def __init__(self, keys: KeysCollection, allow_missing_keys: bool = False) -> None:
-        """
-        Args:
-            keys: keys of the corresponding items to be transformed.
-                See also: :py:class:`monai.transforms.compose.MapTransform`
-            allow_missing_keys: don't raise exception if key is missing.
-        """
+    def __init__(self, keys: KeysCollection, dtype=None, allow_missing_keys: bool = False) -> None:
         super().__init__(keys, allow_missing_keys)
-        self.converter = ToCupy()
+        self.converter = ToCupy(dtype=dtype)
 
     def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
@@ -630,8 +660,38 @@ class DeleteItemsd(MapTransform):
     It will remove the key-values and copy the others to construct a new dictionary.
     """
 
+    def __init__(
+        self,
+        keys: KeysCollection,
+        sep: str = ".",
+        use_re: Union[Sequence[bool], bool] = False,
+    ) -> None:
+        """
+        Args:
+            keys: keys of the corresponding items to delete, can be "A{sep}B{sep}C"
+                to delete key `C` in nested dictionary, `C` can be regular expression.
+                See also: :py:class:`monai.transforms.compose.MapTransform`
+            sep: the separator tag to define nested dictionary keys, default to ".".
+            use_re: whether the specified key is a regular expression, it also can be
+                a list of bool values, map the to keys.
+        """
+        super().__init__(keys)
+        self.sep = sep
+        self.use_re = ensure_tuple_rep(use_re, len(self.keys))
+
     def __call__(self, data):
-        return {key: val for key, val in data.items() if key not in self.key_iterator(data)}
+        def _delete_item(keys, d, use_re: bool = False):
+            key = keys[0]
+            if len(keys) > 1:
+                d[key] = _delete_item(keys[1:], d[key], use_re)
+                return d
+            return {k: v for k, v in d.items() if (use_re and not re.search(key, k)) or (not use_re and k != key)}
+
+        d = dict(data)
+        for key, use_re in zip(self.keys, self.use_re):
+            d = _delete_item(key.split(self.sep), d, use_re)
+
+        return d
 
 
 class SelectItemsd(MapTransform):
@@ -641,8 +701,7 @@ class SelectItemsd(MapTransform):
     """
 
     def __call__(self, data):
-        result = {key: data[key] for key in self.key_iterator(data)}
-        return result
+        return {key: data[key] for key in self.key_iterator(data)}
 
 
 class SqueezeDimd(MapTransform):
@@ -825,7 +884,7 @@ def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, N
                 if isinstance(val, torch.Tensor):
                     d[new_key] = val.detach().clone()
                 else:
-                    d[new_key] = copy.deepcopy(val)
+                    d[new_key] = deepcopy(val)
         return d
 
 
@@ -1059,6 +1118,8 @@ class FgBgToIndicesd(MapTransform):
 
     """
 
+    backend = FgBgToIndices.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1075,7 +1136,7 @@ def __init__(
         self.image_key = image_key
         self.converter = FgBgToIndices(image_threshold, output_shape)
 
-    def __call__(self, data: Mapping[Hashable, np.ndarray]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, NdarrayOrTensor]) -> Dict[Hashable, NdarrayOrTensor]:
         d = dict(data)
         image = d[self.image_key] if self.image_key else None
         for key in self.key_iterator(d):
@@ -1103,6 +1164,8 @@ class ClassesToIndicesd(MapTransform):
 
     """
 
+    backend = ClassesToIndices.backend
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1118,7 +1181,7 @@ def __init__(
         self.image_key = image_key
         self.converter = ClassesToIndices(num_classes, image_threshold, output_shape)
 
-    def __call__(self, data: Mapping[Hashable, Any]) -> Dict[Hashable, np.ndarray]:
+    def __call__(self, data: Mapping[Hashable, Any]):
         d = dict(data)
         image = d[self.image_key] if self.image_key else None
         for key in self.key_iterator(d):
@@ -1401,6 +1464,8 @@ class ToDeviced(MapTransform):
     Dictionary-based wrapper of :py:class:`monai.transforms.ToDevice`.
     """
 
+    backend = [TransformBackends.TORCH]
+
     def __init__(
         self,
         keys: KeysCollection,
@@ -1427,6 +1492,99 @@ def __call__(self, data: Mapping[Hashable, torch.Tensor]) -> Dict[Hashable, torc
         return d
 
 
+class CuCIMd(MapTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.CuCIM` for non-randomized transforms.
+    For randomized transforms of CuCIM use :py:class:`monai.transforms.RandCuCIMd`.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        name: The transform name in CuCIM package.
+        allow_missing_keys: don't raise exception if key is missing.
+        args: parameters for the CuCIM transform.
+        kwargs: parameters for the CuCIM transform.
+
+    Note:
+        CuCIM transforms only work with CuPy arrays, this transform expects input data to be `cupy.ndarray`.
+        Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+    """
+
+    def __init__(
+        self,
+        keys: KeysCollection,
+        name: str,
+        allow_missing_keys: bool = False,
+        *args,
+        **kwargs,
+    ) -> None:
+        super().__init__(keys=keys, allow_missing_keys=allow_missing_keys)
+        self.trans = CuCIM(name, *args, **kwargs)
+
+    def __call__(self, data):
+        """
+        Args:
+            data: Dict[Hashable, `cupy.ndarray`]
+
+        Returns:
+            Dict[Hashable, `cupy.ndarray`]
+
+        """
+        d = dict(data)
+        for key in self.key_iterator(d):
+            d[key] = self.trans(d[key])
+        return d
+
+
+class RandCuCIMd(CuCIMd, RandomizableTransform):
+    """
+    Dictionary-based wrapper of :py:class:`monai.transforms.CuCIM` for randomized transforms.
+    For deterministic non-randomized transforms of CuCIM use :py:class:`monai.transforms.CuCIMd`.
+
+    Args:
+        keys: keys of the corresponding items to be transformed.
+            See also: :py:class:`monai.transforms.compose.MapTransform`
+        name: The transform name in CuCIM package.
+        apply_prob: the probability to apply the transform (default=1.0)
+        allow_missing_keys: don't raise exception if key is missing.
+        args: parameters for the CuCIM transform.
+        kwargs: parameters for the CuCIM transform.
+
+    Note:
+        - CuCIM transform only work with CuPy arrays, so this transform expects input data to be `cupy.ndarray`.
+          Users can call `ToCuPy` transform to convert a numpy array or torch tensor to cupy array.
+        - If the cuCIM transform is already randomized the `apply_prob` argument has nothing to do with
+          the randomness of the underlying cuCIM transform. `apply_prob` defines if the transform (either randomized
+          or non-randomized) being applied randomly, so it can apply non-randomized tranforms randomly but be careful
+          with setting `apply_prob` to anything than 1.0 when using along with cuCIM's randomized transforms.
+        - If the random factor of the underlying cuCIM transform is not derived from `self.R`,
+          the results may not be deterministic. See Also: :py:class:`monai.transforms.Randomizable`.
+    """
+
+    def __init__(
+        self,
+        apply_prob: float = 1.0,
+        *args,
+        **kwargs,
+    ) -> None:
+        CuCIMd.__init__(self, *args, **kwargs)
+        RandomizableTransform.__init__(self, prob=apply_prob)
+
+    def __call__(self, data):
+        """
+        Args:
+            data: Dict[Hashable, `cupy.ndarray`]
+
+        Returns:
+            Dict[Hashable, `cupy.ndarray`]
+
+        """
+        self.randomize(data)
+        if not self._do_transform:
+            return dict(data)
+        return super().__call__(data)
+
+
 IdentityD = IdentityDict = Identityd
 AsChannelFirstD = AsChannelFirstDict = AsChannelFirstd
 AsChannelLastD = AsChannelLastDict = AsChannelLastd
@@ -1463,3 +1621,5 @@ def __call__(self, data: Mapping[Hashable, torch.Tensor]) -> Dict[Hashable, torc
 MapLabelValueD = MapLabelValueDict = MapLabelValued
 IntensityStatsD = IntensityStatsDict = IntensityStatsd
 ToDeviceD = ToDeviceDict = ToDeviced
+CuCIMD = CuCIMDict = CuCIMd
+RandCuCIMD = RandCuCIMDict = RandCuCIMd
diff --git a/monai/transforms/utils.py b/monai/transforms/utils.py
index 30aa5e7b99..15543e91ef 100644
--- a/monai/transforms/utils.py
+++ b/monai/transforms/utils.py
@@ -20,26 +20,30 @@
 import torch
 
 import monai
-import monai.transforms.transform
 from monai.config import DtypeLike, IndexSelection
 from monai.config.type_definitions import NdarrayOrTensor
 from monai.networks.layers import GaussianFilter
 from monai.transforms.compose import Compose, OneOf
-from monai.transforms.transform import MapTransform, Transform
+from monai.transforms.transform import MapTransform, Transform, apply_transform
+from monai.transforms.utils_pytorch_numpy_unification import any_np_pt, nonzero, ravel, unravel_index, where
 from monai.utils import (
     GridSampleMode,
     InterpolateMode,
     InverseKeys,
+    NumpyPadMode,
+    PytorchPadMode,
+    deprecated_arg,
     ensure_tuple,
     ensure_tuple_rep,
     ensure_tuple_size,
     fall_back_tuple,
     issequenceiterable,
+    look_up_option,
     min_version,
     optional_import,
 )
 from monai.utils.enums import TransformBackends
-from monai.utils.type_conversion import convert_data_type
+from monai.utils.type_conversion import convert_data_type, convert_to_dst_type
 
 measure, _ = optional_import("skimage.measure", "0.14.2", min_version)
 ndimage, _ = optional_import("scipy.ndimage")
@@ -84,6 +88,7 @@
     "get_number_image_type_conversions",
     "get_transform_backends",
     "print_transform_backends",
+    "convert_pad_mode",
 ]
 
 
@@ -256,10 +261,10 @@ def resize_center(img: np.ndarray, *resize_dims: Optional[int], fill_value: floa
 
 
 def map_binary_to_indices(
-    label: np.ndarray,
-    image: Optional[np.ndarray] = None,
+    label: NdarrayOrTensor,
+    image: Optional[NdarrayOrTensor] = None,
     image_threshold: float = 0.0,
-) -> Tuple[np.ndarray, np.ndarray]:
+) -> Tuple[NdarrayOrTensor, NdarrayOrTensor]:
     """
     Compute the foreground and background of input label data, return the indices after fattening.
     For example:
@@ -272,28 +277,32 @@ def map_binary_to_indices(
             to define background. so the output items will not map to all the voxels in the label.
         image_threshold: if enabled `image`, use ``image > image_threshold`` to
             determine the valid image content area and select background only in this area.
-
     """
+
     # Prepare fg/bg indices
     if label.shape[0] > 1:
         label = label[1:]  # for One-Hot format data, remove the background channel
-    label_flat = np.any(label, axis=0).ravel()  # in case label has multiple dimensions
-    fg_indices = np.nonzero(label_flat)[0]
+    label_flat = ravel(any_np_pt(label, 0))  # in case label has multiple dimensions
+    fg_indices = nonzero(label_flat)
     if image is not None:
-        img_flat = np.any(image > image_threshold, axis=0).ravel()
-        bg_indices = np.nonzero(np.logical_and(img_flat, ~label_flat))[0]
+        img_flat = ravel(any_np_pt(image > image_threshold, 0))
+        img_flat, *_ = convert_to_dst_type(img_flat, label, dtype=img_flat.dtype)
+        bg_indices = nonzero(img_flat & ~label_flat)
     else:
-        bg_indices = np.nonzero(~label_flat)[0]
+        bg_indices = nonzero(~label_flat)
 
+    # no need to save the indices in GPU, otherwise, still need to move to CPU at runtime when crop by indices
+    fg_indices, *_ = convert_data_type(fg_indices, device=torch.device("cpu"))
+    bg_indices, *_ = convert_data_type(bg_indices, device=torch.device("cpu"))
     return fg_indices, bg_indices
 
 
 def map_classes_to_indices(
-    label: np.ndarray,
+    label: NdarrayOrTensor,
     num_classes: Optional[int] = None,
-    image: Optional[np.ndarray] = None,
+    image: Optional[NdarrayOrTensor] = None,
     image_threshold: float = 0.0,
-) -> List[np.ndarray]:
+) -> List[NdarrayOrTensor]:
     """
     Filter out indices of every class of the input label data, return the indices after fattening.
     It can handle both One-Hot format label and Argmax format label, must provide `num_classes` for
@@ -313,11 +322,11 @@ def map_classes_to_indices(
             determine the valid image content area and select class indices only in this area.
 
     """
-    img_flat: Optional[np.ndarray] = None
+    img_flat: Optional[NdarrayOrTensor] = None
     if image is not None:
-        img_flat = np.any(image > image_threshold, axis=0).ravel()
+        img_flat = ravel((image > image_threshold).any(0))
 
-    indices: List[np.ndarray] = []
+    indices: List[NdarrayOrTensor] = []
     # assuming the first dimension is channel
     channels = len(label)
 
@@ -328,9 +337,11 @@ def map_classes_to_indices(
         num_classes_ = num_classes
 
     for c in range(num_classes_):
-        label_flat = np.any(label[c : c + 1] if channels > 1 else label == c, axis=0).ravel()
-        label_flat = np.logical_and(img_flat, label_flat) if img_flat is not None else label_flat
-        indices.append(np.nonzero(label_flat)[0])
+        label_flat = ravel(any_np_pt(label[c : c + 1] if channels > 1 else label == c, 0))
+        label_flat = img_flat & label_flat if img_flat is not None else label_flat
+        # no need to save the indices in GPU, otherwise, still need to move to CPU at runtime when crop by indices
+        cls_indices, *_ = convert_data_type(nonzero(label_flat), device=torch.device("cpu"))
+        indices.append(cls_indices)
 
     return indices
 
@@ -380,13 +391,15 @@ def weighted_patch_samples(
 
 
 def correct_crop_centers(
-    centers: List[np.ndarray], spatial_size: Union[Sequence[int], int], label_spatial_shape: Sequence[int]
-) -> List[np.ndarray]:
+    centers: List[Union[int, torch.Tensor]],
+    spatial_size: Union[Sequence[int], int],
+    label_spatial_shape: Sequence[int],
+):
     """
     Utility to correct the crop center if the crop size is bigger than the image size.
 
     Args:
-        ceters: pre-computed crop centers, will correct based on the valid region.
+        centers: pre-computed crop centers of every dim, will correct based on the valid region.
         spatial_size: spatial size of the ROIs to be sampled.
         label_spatial_shape: spatial shape of the original label data to compare with ROI.
 
@@ -422,10 +435,10 @@ def generate_pos_neg_label_crop_centers(
     num_samples: int,
     pos_ratio: float,
     label_spatial_shape: Sequence[int],
-    fg_indices: np.ndarray,
-    bg_indices: np.ndarray,
+    fg_indices: NdarrayOrTensor,
+    bg_indices: NdarrayOrTensor,
     rand_state: Optional[np.random.RandomState] = None,
-) -> List[List[np.ndarray]]:
+) -> List[List[int]]:
     """
     Generate valid sample locations based on the label with option for specifying foreground ratio
     Valid: samples sitting entirely within image, expected input shape: [C, H, W, D] or [C, H, W]
@@ -448,11 +461,12 @@ def generate_pos_neg_label_crop_centers(
         rand_state = np.random.random.__self__  # type: ignore
 
     centers = []
-    fg_indices, bg_indices = np.asarray(fg_indices), np.asarray(bg_indices)
-    if fg_indices.size == 0 and bg_indices.size == 0:
+    fg_indices = np.asarray(fg_indices) if isinstance(fg_indices, Sequence) else fg_indices
+    bg_indices = np.asarray(bg_indices) if isinstance(bg_indices, Sequence) else bg_indices
+    if len(fg_indices) == 0 and len(bg_indices) == 0:
         raise ValueError("No sampling location available.")
 
-    if fg_indices.size == 0 or bg_indices.size == 0:
+    if len(fg_indices) == 0 or len(bg_indices) == 0:
         warnings.warn(
             f"N foreground {len(fg_indices)}, N  background {len(bg_indices)},"
             "unable to generate class balanced samples."
@@ -462,10 +476,10 @@ def generate_pos_neg_label_crop_centers(
     for _ in range(num_samples):
         indices_to_use = fg_indices if rand_state.rand() < pos_ratio else bg_indices
         random_int = rand_state.randint(len(indices_to_use))
-        center = np.unravel_index(indices_to_use[random_int], label_spatial_shape)
+        idx = indices_to_use[random_int]
+        center = unravel_index(idx, label_spatial_shape)
         # shift center to range of valid centers
-        center_ori = list(center)
-        centers.append(correct_crop_centers(center_ori, spatial_size, label_spatial_shape))
+        centers.append(correct_crop_centers(center, spatial_size, label_spatial_shape))
 
     return centers
 
@@ -474,10 +488,10 @@ def generate_label_classes_crop_centers(
     spatial_size: Union[Sequence[int], int],
     num_samples: int,
     label_spatial_shape: Sequence[int],
-    indices: List[np.ndarray],
+    indices: Sequence[NdarrayOrTensor],
     ratios: Optional[List[Union[float, int]]] = None,
     rand_state: Optional[np.random.RandomState] = None,
-) -> List[List[np.ndarray]]:
+) -> List[List[int]]:
     """
     Generate valid sample locations based on the specified ratios of label classes.
     Valid: samples sitting entirely within image, expected input shape: [C, H, W, D] or [C, H, W]
@@ -499,12 +513,10 @@ def generate_label_classes_crop_centers(
         raise ValueError("num_samples must be an int number and greater than 0.")
     ratios_: List[Union[float, int]] = ([1] * len(indices)) if ratios is None else ratios
     if len(ratios_) != len(indices):
-        raise ValueError("random crop radios must match the number of indices of classes.")
+        raise ValueError("random crop ratios must match the number of indices of classes.")
     if any(i < 0 for i in ratios_):
         raise ValueError("ratios should not contain negative number.")
 
-    # ensure indices are numpy array
-    indices = [np.asarray(i) for i in indices]
     for i, array in enumerate(indices):
         if len(array) == 0:
             warnings.warn(f"no available indices of class {i} to crop, set the crop ratio of this class to zero.")
@@ -516,7 +528,7 @@ def generate_label_classes_crop_centers(
         # randomly select the indices of a class based on the ratios
         indices_to_use = indices[i]
         random_int = rand_state.randint(len(indices_to_use))
-        center = np.unravel_index(indices_to_use[random_int], label_spatial_shape)
+        center = unravel_index(indices_to_use[random_int], label_spatial_shape)
         # shift center to range of valid centers
         center_ori = list(center)
         centers.append(correct_crop_centers(center_ori, spatial_size, label_spatial_shape))
@@ -528,7 +540,9 @@ def create_grid(
     spatial_size: Sequence[int],
     spacing: Optional[Sequence[float]] = None,
     homogeneous: bool = True,
-    dtype: DtypeLike = float,
+    dtype=float,
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
 ):
     """
     compute a `spatial_size` mesh.
@@ -538,6 +552,26 @@ def create_grid(
         spacing: same len as ``spatial_size``, defaults to 1.0 (dense grid).
         homogeneous: whether to make homogeneous coordinates.
         dtype: output grid data type.
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+
+    """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_grid_numpy(spatial_size, spacing, homogeneous, dtype)
+    if _backend == TransformBackends.TORCH:
+        return _create_grid_torch(spatial_size, spacing, homogeneous, dtype, device)
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_grid_numpy(
+    spatial_size: Sequence[int],
+    spacing: Optional[Sequence[float]] = None,
+    homogeneous: bool = True,
+    dtype: DtypeLike = float,
+):
+    """
+    compute a `spatial_size` mesh with the numpy API.
     """
     spacing = spacing or tuple(1.0 for _ in spatial_size)
     ranges = [np.linspace(-(d - 1.0) / 2.0 * s, (d - 1.0) / 2.0 * s, int(d)) for d, s in zip(spatial_size, spacing)]
@@ -547,23 +581,58 @@ def create_grid(
     return np.concatenate([coords, np.ones_like(coords[:1])])
 
 
+def _create_grid_torch(
+    spatial_size: Sequence[int],
+    spacing: Optional[Sequence[float]] = None,
+    homogeneous: bool = True,
+    dtype=torch.float32,
+    device: Optional[torch.device] = None,
+):
+    """
+    compute a `spatial_size` mesh with the torch API.
+    """
+    spacing = spacing or tuple(1.0 for _ in spatial_size)
+    ranges = [
+        torch.linspace(-(d - 1.0) / 2.0 * s, (d - 1.0) / 2.0 * s, int(d), device=device, dtype=dtype)
+        for d, s in zip(spatial_size, spacing)
+    ]
+    coords = torch.meshgrid(*ranges)
+    if not homogeneous:
+        return torch.stack(coords)
+    return torch.stack([*coords, torch.ones_like(coords[0])])
+
+
 def create_control_grid(
-    spatial_shape: Sequence[int], spacing: Sequence[float], homogeneous: bool = True, dtype: DtypeLike = float
+    spatial_shape: Sequence[int],
+    spacing: Sequence[float],
+    homogeneous: bool = True,
+    dtype: DtypeLike = float,
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
 ):
     """
     control grid with two additional point in each direction
     """
+    torch_backend = look_up_option(backend, TransformBackends) == TransformBackends.TORCH
+    ceil_func: Callable = torch.ceil if torch_backend else np.ceil  # type: ignore
     grid_shape = []
     for d, s in zip(spatial_shape, spacing):
-        d = int(d)
+        d = torch.as_tensor(d, device=device) if torch_backend else int(d)  # type: ignore
         if d % 2 == 0:
-            grid_shape.append(np.ceil((d - 1.0) / (2.0 * s) + 0.5) * 2.0 + 2.0)
+            grid_shape.append(ceil_func((d - 1.0) / (2.0 * s) + 0.5) * 2.0 + 2.0)
         else:
-            grid_shape.append(np.ceil((d - 1.0) / (2.0 * s)) * 2.0 + 3.0)
-    return create_grid(grid_shape, spacing, homogeneous, dtype)
+            grid_shape.append(ceil_func((d - 1.0) / (2.0 * s)) * 2.0 + 3.0)
+    return create_grid(
+        spatial_size=grid_shape, spacing=spacing, homogeneous=homogeneous, dtype=dtype, device=device, backend=backend
+    )
 
 
-def create_rotate(spatial_dims: int, radians: Union[Sequence[float], float]) -> np.ndarray:
+def create_rotate(
+    spatial_dims: int,
+    radians: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a 2D or 3D rotation matrix
 
@@ -572,48 +641,83 @@ def create_rotate(spatial_dims: int, radians: Union[Sequence[float], float]) ->
         radians: rotation radians
             when spatial_dims == 3, the `radians` sequence corresponds to
             rotation in the 1st, 2nd, and 3rd dim respectively.
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
 
     Raises:
         ValueError: When ``radians`` is empty.
         ValueError: When ``spatial_dims`` is not one of [2, 3].
 
     """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_rotate(
+            spatial_dims=spatial_dims, radians=radians, sin_func=np.sin, cos_func=np.cos, eye_func=np.eye
+        )
+    if _backend == TransformBackends.TORCH:
+        return _create_rotate(
+            spatial_dims=spatial_dims,
+            radians=radians,
+            sin_func=lambda th: torch.sin(torch.as_tensor(th, dtype=torch.float32, device=device)),
+            cos_func=lambda th: torch.cos(torch.as_tensor(th, dtype=torch.float32, device=device)),
+            eye_func=lambda rank: torch.eye(rank, device=device),
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_rotate(
+    spatial_dims: int,
+    radians: Union[Sequence[float], float],
+    sin_func: Callable = np.sin,
+    cos_func: Callable = np.cos,
+    eye_func: Callable = np.eye,
+) -> NdarrayOrTensor:
     radians = ensure_tuple(radians)
     if spatial_dims == 2:
         if len(radians) >= 1:
-            sin_, cos_ = np.sin(radians[0]), np.cos(radians[0])
-            return np.array([[cos_, -sin_, 0.0], [sin_, cos_, 0.0], [0.0, 0.0, 1.0]])
+            sin_, cos_ = sin_func(radians[0]), cos_func(radians[0])
+            out = eye_func(3)
+            out[0, 0], out[0, 1] = cos_, -sin_
+            out[1, 0], out[1, 1] = sin_, cos_
+            return out  # type: ignore
         raise ValueError("radians must be non empty.")
 
     if spatial_dims == 3:
         affine = None
         if len(radians) >= 1:
-            sin_, cos_ = np.sin(radians[0]), np.cos(radians[0])
-            affine = np.array(
-                [[1.0, 0.0, 0.0, 0.0], [0.0, cos_, -sin_, 0.0], [0.0, sin_, cos_, 0.0], [0.0, 0.0, 0.0, 1.0]]
-            )
+            sin_, cos_ = sin_func(radians[0]), cos_func(radians[0])
+            affine = eye_func(4)
+            affine[1, 1], affine[1, 2] = cos_, -sin_
+            affine[2, 1], affine[2, 2] = sin_, cos_
         if len(radians) >= 2:
-            sin_, cos_ = np.sin(radians[1]), np.cos(radians[1])
+            sin_, cos_ = sin_func(radians[1]), cos_func(radians[1])
             if affine is None:
                 raise ValueError("Affine should be a matrix.")
-            affine = affine @ np.array(
-                [[cos_, 0.0, sin_, 0.0], [0.0, 1.0, 0.0, 0.0], [-sin_, 0.0, cos_, 0.0], [0.0, 0.0, 0.0, 1.0]]
-            )
+            _affine = eye_func(4)
+            _affine[0, 0], _affine[0, 2] = cos_, sin_
+            _affine[2, 0], _affine[2, 2] = -sin_, cos_
+            affine = affine @ _affine
         if len(radians) >= 3:
-            sin_, cos_ = np.sin(radians[2]), np.cos(radians[2])
+            sin_, cos_ = sin_func(radians[2]), cos_func(radians[2])
             if affine is None:
                 raise ValueError("Affine should be a matrix.")
-            affine = affine @ np.array(
-                [[cos_, -sin_, 0.0, 0.0], [sin_, cos_, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 1.0]]
-            )
+            _affine = eye_func(4)
+            _affine[0, 0], _affine[0, 1] = cos_, -sin_
+            _affine[1, 0], _affine[1, 1] = sin_, cos_
+            affine = affine @ _affine
         if affine is None:
             raise ValueError("radians must be non empty.")
-        return affine
+        return affine  # type: ignore
 
     raise ValueError(f"Unsupported spatial_dims: {spatial_dims}, available options are [2, 3].")
 
 
-def create_shear(spatial_dims: int, coefs: Union[Sequence[float], float]) -> np.ndarray:
+def create_shear(
+    spatial_dims: int,
+    coefs: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a shearing matrix
 
@@ -629,55 +733,113 @@ def create_shear(spatial_dims: int, coefs: Union[Sequence[float], float]) -> np.
                     [0.0, 0.0, 0.0, 1.0],
                 ]
 
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+
     Raises:
         NotImplementedError: When ``spatial_dims`` is not one of [2, 3].
 
     """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_shear(spatial_dims=spatial_dims, coefs=coefs, eye_func=np.eye)
+    if _backend == TransformBackends.TORCH:
+        return _create_shear(
+            spatial_dims=spatial_dims, coefs=coefs, eye_func=lambda rank: torch.eye(rank, device=device)
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_shear(spatial_dims: int, coefs: Union[Sequence[float], float], eye_func=np.eye) -> NdarrayOrTensor:
     if spatial_dims == 2:
         coefs = ensure_tuple_size(coefs, dim=2, pad_val=0.0)
-        return np.array([[1, coefs[0], 0.0], [coefs[1], 1.0, 0.0], [0.0, 0.0, 1.0]])
+        out = eye_func(3)
+        out[0, 1], out[1, 0] = coefs[0], coefs[1]
+        return out  # type: ignore
     if spatial_dims == 3:
         coefs = ensure_tuple_size(coefs, dim=6, pad_val=0.0)
-        return np.array(
-            [
-                [1.0, coefs[0], coefs[1], 0.0],
-                [coefs[2], 1.0, coefs[3], 0.0],
-                [coefs[4], coefs[5], 1.0, 0.0],
-                [0.0, 0.0, 0.0, 1.0],
-            ]
-        )
+        out = eye_func(4)
+        out[0, 1], out[0, 2] = coefs[0], coefs[1]
+        out[1, 0], out[1, 2] = coefs[2], coefs[3]
+        out[2, 0], out[2, 1] = coefs[4], coefs[5]
+        return out  # type: ignore
     raise NotImplementedError("Currently only spatial_dims in [2, 3] are supported.")
 
 
-def create_scale(spatial_dims: int, scaling_factor: Union[Sequence[float], float]):
+def create_scale(
+    spatial_dims: int,
+    scaling_factor: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a scaling matrix
 
     Args:
         spatial_dims: spatial rank
         scaling_factor: scaling factors for every spatial dim, defaults to 1.
-    """
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+    """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_scale(spatial_dims=spatial_dims, scaling_factor=scaling_factor, array_func=np.diag)
+    if _backend == TransformBackends.TORCH:
+        return _create_scale(
+            spatial_dims=spatial_dims,
+            scaling_factor=scaling_factor,
+            array_func=lambda x: torch.diag(torch.as_tensor(x, device=device)),
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_scale(
+    spatial_dims: int, scaling_factor: Union[Sequence[float], float], array_func=np.diag
+) -> NdarrayOrTensor:
     scaling_factor = ensure_tuple_size(scaling_factor, dim=spatial_dims, pad_val=1.0)
-    return np.diag(scaling_factor[:spatial_dims] + (1.0,))
+    return array_func(scaling_factor[:spatial_dims] + (1.0,))  # type: ignore
 
 
-def create_translate(spatial_dims: int, shift: Union[Sequence[float], float]) -> np.ndarray:
+def create_translate(
+    spatial_dims: int,
+    shift: Union[Sequence[float], float],
+    device: Optional[torch.device] = None,
+    backend=TransformBackends.NUMPY,
+) -> NdarrayOrTensor:
     """
     create a translation matrix
 
     Args:
         spatial_dims: spatial rank
         shift: translate pixel/voxel for every spatial dim, defaults to 0.
-    """
+        device: device to compute and store the output (when the backend is "torch").
+        backend: APIs to use, ``numpy`` or ``torch``.
+    """
+    _backend = look_up_option(backend, TransformBackends)
+    if _backend == TransformBackends.NUMPY:
+        return _create_translate(spatial_dims=spatial_dims, shift=shift, eye_func=np.eye, array_func=np.asarray)
+    if _backend == TransformBackends.TORCH:
+        return _create_translate(
+            spatial_dims=spatial_dims,
+            shift=shift,
+            eye_func=lambda x: torch.eye(torch.as_tensor(x), device=device),  # type: ignore
+            array_func=lambda x: torch.as_tensor(x, device=device),  # type: ignore
+        )
+    raise ValueError(f"backend {backend} is not supported")
+
+
+def _create_translate(
+    spatial_dims: int, shift: Union[Sequence[float], float], eye_func=np.eye, array_func=np.asarray
+) -> NdarrayOrTensor:
     shift = ensure_tuple(shift)
-    affine = np.eye(spatial_dims + 1)
+    affine = eye_func(spatial_dims + 1)
     for i, a in enumerate(shift[:spatial_dims]):
         affine[i, spatial_dims] = a
-    return np.asarray(affine)
+    return array_func(affine)  # type: ignore
 
 
 def generate_spatial_bounding_box(
-    img: np.ndarray,
+    img: NdarrayOrTensor,
     select_fn: Callable = is_positive,
     channel_indices: Optional[IndexSelection] = None,
     margin: Union[Sequence[int], int] = 0,
@@ -702,7 +864,7 @@ def generate_spatial_bounding_box(
         margin: add margin value to spatial dims of the bounding box, if only 1 value provided, use it for all dims.
     """
     data = img[list(ensure_tuple(channel_indices))] if channel_indices is not None else img
-    data = np.any(select_fn(data), axis=0)
+    data = select_fn(data).any(0)
     ndim = len(data.shape)
     margin = ensure_tuple_rep(margin, ndim)
     for m in margin:
@@ -713,13 +875,18 @@ def generate_spatial_bounding_box(
     box_end = [0] * ndim
 
     for di, ax in enumerate(itertools.combinations(reversed(range(ndim)), ndim - 1)):
-        dt = data.any(axis=ax)
-        if not np.any(dt):
+        dt = data
+        if len(ax) != 0:
+            dt = any_np_pt(dt, ax)
+
+        if not dt.any():
             # if no foreground, return all zero bounding box coords
             return [0] * ndim, [0] * ndim
 
-        min_d = max(np.argmax(dt) - margin[di], 0)
-        max_d = max(data.shape[di] - max(np.argmax(dt[::-1]) - margin[di], 0), min_d + 1)
+        arg_max = where(dt == dt.max())[0]
+        min_d = max(arg_max[0] - margin[di], 0)
+        max_d = arg_max[-1] + margin[di] + 1
+
         box_start[di], box_end[di] = min_d, max_d
 
     return box_start, box_end
@@ -1041,8 +1208,8 @@ def compute_divisible_spatial_size(spatial_shape: Sequence[int], k: Union[Sequen
 
 
 def equalize_hist(
-    img: np.ndarray,
-    mask: Optional[np.ndarray] = None,
+    img: NdarrayOrTensor,
+    mask: Optional[NdarrayOrTensor] = None,
     num_bins: int = 256,
     min: int = 0,
     max: int = 255,
@@ -1064,8 +1231,14 @@ def equalize_hist(
         dtype: data type of the output, default to `float32`.
 
     """
-    orig_shape = img.shape
-    hist_img = img[np.array(mask, dtype=bool)] if mask is not None else img
+    img_np: np.ndarray
+    img_np, *_ = convert_data_type(img, np.ndarray)  # type: ignore
+    mask_np: Optional[np.ndarray] = None
+    if mask is not None:
+        mask_np, *_ = convert_data_type(mask, np.ndarray)  # type: ignore
+
+    orig_shape = img_np.shape
+    hist_img = img_np[np.array(mask_np, dtype=bool)] if mask_np is not None else img_np
     if has_skimage:
         hist, bins = exposure.histogram(hist_img.flatten(), num_bins)
     else:
@@ -1077,9 +1250,9 @@ def equalize_hist(
     cum = rescale_array(arr=cum, minv=min, maxv=max)
 
     # apply linear interpolation
-    img = np.interp(img.flatten(), bins, cum)
+    img_np = np.interp(img_np.flatten(), bins, cum)
 
-    return img.reshape(orig_shape).astype(dtype)
+    return img_np.reshape(orig_shape).astype(dtype)
 
 
 class Fourier:
@@ -1088,38 +1261,70 @@ class Fourier:
     """
 
     @staticmethod
-    def shift_fourier(x: torch.Tensor, n_dims: int) -> torch.Tensor:
+    @deprecated_arg(
+        name="n_dims", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
+    def shift_fourier(x: NdarrayOrTensor, spatial_dims: int, n_dims: Optional[int] = None) -> NdarrayOrTensor:
         """
         Applies fourier transform and shifts the zero-frequency component to the
         center of the spectrum. Only the spatial dimensions get transformed.
 
         Args:
             x: Image to transform.
-            n_dims: Number of spatial dimensions.
+            spatial_dims: Number of spatial dimensions.
+
+        .. deprecated:: 0.6.0
+            ``n_dims`` is deprecated, use ``spatial_dims`` instead.
+
         Returns
             k: K-space data.
         """
-        k: torch.Tensor = torch.fft.fftshift(
-            torch.fft.fftn(x, dim=tuple(range(-n_dims, 0))), dim=tuple(range(-n_dims, 0))
-        )
+        if n_dims is not None:
+            spatial_dims = n_dims
+        dims = tuple(range(-spatial_dims, 0))
+        k: NdarrayOrTensor
+        if isinstance(x, torch.Tensor):
+            if hasattr(torch.fft, "fftshift"):
+                k = torch.fft.fftshift(torch.fft.fftn(x, dim=dims), dim=dims)
+            else:
+                # if using old PyTorch, will convert to numpy array and return
+                k = np.fft.fftshift(np.fft.fftn(x.cpu().numpy(), axes=dims), axes=dims)
+        else:
+            k = np.fft.fftshift(np.fft.fftn(x, axes=dims), axes=dims)
         return k
 
     @staticmethod
-    def inv_shift_fourier(k: torch.Tensor, n_dims: int) -> torch.Tensor:
+    @deprecated_arg(
+        name="n_dims", new_name="spatial_dims", since="0.6", msg_suffix="Please use `spatial_dims` instead."
+    )
+    def inv_shift_fourier(k: NdarrayOrTensor, spatial_dims: int, n_dims: Optional[int] = None) -> NdarrayOrTensor:
         """
         Applies inverse shift and fourier transform. Only the spatial
         dimensions are transformed.
 
         Args:
             k: K-space data.
-            n_dims: Number of spatial dimensions.
+            spatial_dims: Number of spatial dimensions.
+
+        .. deprecated:: 0.6.0
+            ``n_dims`` is deprecated, use ``spatial_dims`` instead.
+
         Returns:
             x: Tensor in image space.
         """
-        x: torch.Tensor = torch.fft.ifftn(
-            torch.fft.ifftshift(k, dim=tuple(range(-n_dims, 0))), dim=tuple(range(-n_dims, 0))
-        ).real
-        return x
+        if n_dims is not None:
+            spatial_dims = n_dims
+        dims = tuple(range(-spatial_dims, 0))
+        out: NdarrayOrTensor
+        if isinstance(k, torch.Tensor):
+            if hasattr(torch.fft, "ifftshift"):
+                out = torch.fft.ifftn(torch.fft.ifftshift(k, dim=dims), dim=dims, norm="backward").real
+            else:
+                # if using old PyTorch, will convert to numpy array and return
+                out = np.fft.ifftn(np.fft.ifftshift(k.cpu().numpy(), axes=dims), axes=dims).real
+        else:
+            out = np.fft.ifftn(np.fft.ifftshift(k, axes=dims), axes=dims).real
+        return out
 
 
 def get_number_image_type_conversions(transform: Compose, test_data: Any, key: Optional[Hashable] = None) -> int:
@@ -1149,9 +1354,7 @@ def _get_data(obj, key):
         prev_data = _get_data(test_data, key)
         prev_type = type(prev_data)
         prev_device = prev_data.device if isinstance(prev_data, torch.Tensor) else None
-        test_data = monai.transforms.transform.apply_transform(
-            _transform, test_data, transform.map_items, transform.unpack_items
-        )
+        test_data = apply_transform(_transform, test_data, transform.map_items, transform.unpack_items)
         # every time the type or device changes, increment the counter
         curr_data = _get_data(test_data, key)
         curr_device = curr_data.device if isinstance(curr_data, torch.Tensor) else None
@@ -1178,20 +1381,29 @@ def get_transform_backends():
             continue
         unique_transforms.append(obj)
 
-        if isclass(obj) and issubclass(obj, Transform):
-            if n in [
-                "Transform",
+        if (
+            isclass(obj)
+            and issubclass(obj, Transform)
+            and n
+            not in [
+                "BatchInverseTransform",
+                "Compose",
+                "Decollated",
+                "InvertD",
                 "InvertibleTransform",
                 "Lambda",
                 "LambdaD",
-                "Compose",
-                "RandomizableTransform",
+                "MapTransform",
                 "OneOf",
-                "BatchInverseTransform",
-                "InverteD",
-            ]:
-                continue
-
+                "PadListDataCollate",
+                "RandLambda",
+                "RandLambdaD",
+                "RandTorchVisionD",
+                "RandomizableTransform",
+                "TorchVisionD",
+                "Transform",
+            ]
+        ):
             backends[n] = [
                 TransformBackends.TORCH in obj.backend,
                 TransformBackends.NUMPY in obj.backend,
@@ -1212,7 +1424,7 @@ def print_color(t, color):
         print(f"\033[{color}m{t}\033[00m")
 
     def print_table_column(name, torch, numpy, color=Colors.none):
-        print_color("{:<50} {:<8} {:<8}".format(name, torch, numpy), color)
+        print_color(f"{name:<50} {torch:<8} {numpy:<8}", color)
 
     backends = get_transform_backends()
     n_total = len(backends)
@@ -1240,5 +1452,30 @@ def print_table_column(name, torch, numpy, color=Colors.none):
     print_color(f"Number of uncategorised: {n_uncategorized}", Colors.red)
 
 
+def convert_pad_mode(dst: NdarrayOrTensor, mode: Union[NumpyPadMode, PytorchPadMode, str]):
+    """
+    Utility to convert padding mode between numpy array and PyTorch Tensor.
+
+    Args:
+        dst: target data to convert padding mode for, should be numpy array or PyTorch Tensor.
+        mode: current padding mode.
+
+    """
+    mode = mode.value if isinstance(mode, (NumpyPadMode, PytorchPadMode)) else mode
+    if isinstance(dst, torch.Tensor):
+        if mode == "wrap":
+            mode = "circular"
+        if mode == "edge":
+            mode = "replicate"
+        return look_up_option(mode, PytorchPadMode)
+    if isinstance(dst, np.ndarray):
+        if mode == "circular":
+            mode = "wrap"
+        if mode == "replicate":
+            mode = "edge"
+        return look_up_option(mode, NumpyPadMode)
+    raise ValueError(f"unsupported data type: {type(dst)}.")
+
+
 if __name__ == "__main__":
     print_transform_backends()
diff --git a/monai/transforms/utils_create_transform_ims.py b/monai/transforms/utils_create_transform_ims.py
new file mode 100644
index 0000000000..b842c41572
--- /dev/null
+++ b/monai/transforms/utils_create_transform_ims.py
@@ -0,0 +1,668 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import pathlib
+import tempfile
+import textwrap
+from copy import deepcopy
+from glob import glob
+from typing import TYPE_CHECKING, Callable
+
+import numpy as np
+import torch
+
+from monai.apps import download_and_extract
+from monai.transforms import (
+    AddChanneld,
+    Affine,
+    Affined,
+    AsDiscrete,
+    Compose,
+    Flip,
+    Flipd,
+    LoadImaged,
+    MapTransform,
+    Orientation,
+    Orientationd,
+    Rand3DElastic,
+    Rand3DElasticd,
+    RandFlip,
+    RandFlipd,
+    Randomizable,
+    RandRotate,
+    RandRotated,
+    RandZoom,
+    RandZoomd,
+    Rotate,
+    Rotate90,
+    Rotate90d,
+    Rotated,
+    ScaleIntensity,
+    ScaleIntensityd,
+    SpatialPadd,
+    Zoom,
+    Zoomd,
+)
+from monai.transforms.croppad.array import (
+    BorderPad,
+    CenterScaleCrop,
+    CenterSpatialCrop,
+    CropForeground,
+    DivisiblePad,
+    RandCropByLabelClasses,
+    RandCropByPosNegLabel,
+    RandScaleCrop,
+    RandSpatialCrop,
+    RandSpatialCropSamples,
+    RandWeightedCrop,
+    ResizeWithPadOrCrop,
+    SpatialCrop,
+    SpatialPad,
+)
+from monai.transforms.croppad.dictionary import (
+    BorderPadd,
+    CenterScaleCropd,
+    CenterSpatialCropd,
+    CropForegroundd,
+    DivisiblePadd,
+    RandCropByLabelClassesd,
+    RandCropByPosNegLabeld,
+    RandScaleCropd,
+    RandSpatialCropd,
+    RandSpatialCropSamplesd,
+    RandWeightedCropd,
+    ResizeWithPadOrCropd,
+    SpatialCropd,
+)
+from monai.transforms.intensity.array import (
+    AdjustContrast,
+    GaussianSharpen,
+    GaussianSmooth,
+    GibbsNoise,
+    HistogramNormalize,
+    KSpaceSpikeNoise,
+    MaskIntensity,
+    NormalizeIntensity,
+    RandAdjustContrast,
+    RandBiasField,
+    RandCoarseDropout,
+    RandCoarseShuffle,
+    RandGaussianNoise,
+    RandGaussianSharpen,
+    RandGaussianSmooth,
+    RandGibbsNoise,
+    RandHistogramShift,
+    RandKSpaceSpikeNoise,
+    RandScaleIntensity,
+    RandShiftIntensity,
+    RandStdShiftIntensity,
+    ScaleIntensityRange,
+    ScaleIntensityRangePercentiles,
+    ShiftIntensity,
+    StdShiftIntensity,
+    ThresholdIntensity,
+)
+from monai.transforms.intensity.dictionary import (
+    AdjustContrastd,
+    GaussianSharpend,
+    GaussianSmoothd,
+    GibbsNoised,
+    HistogramNormalized,
+    KSpaceSpikeNoised,
+    MaskIntensityd,
+    NormalizeIntensityd,
+    RandAdjustContrastd,
+    RandBiasFieldd,
+    RandCoarseDropoutd,
+    RandCoarseShuffled,
+    RandGaussianNoised,
+    RandGaussianSharpend,
+    RandGaussianSmoothd,
+    RandGibbsNoised,
+    RandHistogramShiftd,
+    RandKSpaceSpikeNoised,
+    RandScaleIntensityd,
+    RandShiftIntensityd,
+    RandStdShiftIntensityd,
+    ScaleIntensityRanged,
+    ScaleIntensityRangePercentilesd,
+    ShiftIntensityd,
+    StdShiftIntensityd,
+    ThresholdIntensityd,
+)
+from monai.transforms.post.array import LabelFilter, LabelToContour
+from monai.transforms.post.dictionary import AsDiscreted, LabelFilterd, LabelToContourd
+from monai.transforms.spatial.array import Rand2DElastic, RandAffine, RandAxisFlip, RandRotate90, Resize, Spacing
+from monai.transforms.spatial.dictionary import (
+    Rand2DElasticd,
+    RandAffined,
+    RandAxisFlipd,
+    RandRotate90d,
+    Resized,
+    Spacingd,
+)
+from monai.utils.enums import CommonKeys
+from monai.utils.module import optional_import
+
+if TYPE_CHECKING:
+    import matplotlib.pyplot as plt
+
+    has_matplotlib = True
+
+else:
+    plt, has_matplotlib = optional_import("matplotlib.pyplot")
+
+
+def get_data(keys):
+    """Get the example data to be used.
+
+    Use MarsAtlas as it only contains 1 image for quick download and
+    that image is parcellated.
+    """
+    cache_dir = os.environ.get("MONAI_DATA_DIRECTORY") or tempfile.mkdtemp()
+    fname = "MarsAtlas-MNI-Colin27.zip"
+    url = "https://www.dropbox.com/s/ndz8qtqblkciole/" + fname + "?dl=1"
+    out_path = os.path.join(cache_dir, "MarsAtlas-MNI-Colin27")
+    zip_path = os.path.join(cache_dir, fname)
+
+    download_and_extract(url, zip_path, out_path)
+
+    image, label = sorted(glob(os.path.join(out_path, "*.nii")))
+
+    data = {CommonKeys.IMAGE: image, CommonKeys.LABEL: label}
+
+    transforms = Compose(
+        [
+            LoadImaged(keys),
+            AddChanneld(keys),
+            ScaleIntensityd(CommonKeys.IMAGE),
+            Rotate90d(keys, spatial_axes=[0, 2]),
+        ]
+    )
+    data = transforms(data)
+    max_size = max(data[keys[0]].shape)
+    padder = SpatialPadd(keys, (max_size, max_size, max_size))
+    return padder(data)
+
+
+def update_docstring(code_path, transform_name):
+    """
+    Find the documentation for a given transform and if it's missing,
+    add a pointer to the transform's example image.
+    """
+    with open(code_path) as f:
+        contents = f.readlines()
+    doc_start = None
+    for i, line in enumerate(contents):
+        # find the line containing start of the transform documentation
+        if "`" + transform_name + "`" in line:
+            doc_start = i
+            break
+    if doc_start is None:
+        raise RuntimeError("Couldn't find transform documentation")
+
+    # if image is already in docs, nothing to do
+    image_line = doc_start + 2
+    if ".. image" in contents[image_line]:
+        return
+
+    # add the line for the image and the alt text
+    contents_orig = deepcopy(contents)
+    contents.insert(
+        image_line,
+        ".. image:: https://github.com/Project-MONAI/DocImages/raw/main/transforms/" + transform_name + ".png\n",
+    )
+    contents.insert(image_line + 1, "    :alt: example of " + transform_name + "\n")
+
+    # check that we've only added two lines
+    assert len(contents) == len(contents_orig) + 2
+
+    # write the updated doc to overwrite the original
+    with open(code_path, "w") as f:
+        f.writelines(contents)
+
+
+def pre_process_data(data, ndim, is_map, is_post):
+    """If transform requires 2D data, then convert to 2D"""
+    if ndim == 2:
+        for k in keys:
+            data[k] = data[k][..., data[k].shape[-1] // 2]
+    if is_post:
+        for k in keys:
+            data[k] = torch.as_tensor(data[k])
+
+    if is_map:
+        return data
+    return data[CommonKeys.LABEL] if is_post else data[CommonKeys.IMAGE]
+
+
+def get_2d_slice(image, view, is_label):
+    """If image is 3d, get the central slice. If is already 2d, return as-is.
+    If image is label, set 0 to np.nan.
+    """
+    if image.ndim == 2:
+        out = image
+    else:
+        shape = image.shape
+        slices = [slice(0, s) for s in shape]
+        _slice = shape[view] // 2
+        slices[view] = slice(_slice, _slice + 1)
+        slices = tuple(slices)
+        out = np.squeeze(image[slices], view)
+    if is_label:
+        out[out == 0] = np.nan
+    return out
+
+
+def get_stacked_2d_ims(im, is_label):
+    """Get the 3 orthogonal views and stack them into 1 image.
+    Requires that all images be same size, but this is taken care
+    of by the `SpatialPadd` earlier.
+    """
+    return [get_2d_slice(im, i, is_label) for i in range(3)]
+
+
+def get_stacked_before_after(before, after, is_label=False):
+    """Stack before and after images into 1 image if 3d.
+    Requires that before and after images be the same size.
+    """
+    return [get_stacked_2d_ims(d, is_label) for d in (before, after)]
+
+
+def save_image(images, labels, filename, transform_name, transform_args, shapes, colorbar=False):
+    """Save image to file, ensuring there's no whitespace around the edge."""
+    plt.rcParams.update({"font.family": "monospace"})
+    plt.style.use("dark_background")
+    nrow = len(images)  # before and after (should always be 2)
+    ncol = len(images[0])  # num orthogonal views (either 1 or 3)
+    # roughly estimate the height_ratios of the first:second row
+    hs = [float(r[0].shape[0]) for r in images]
+    fig = plt.figure(tight_layout=True)
+    spec = fig.add_gridspec(nrow, ncol, hspace=0, wspace=0, height_ratios=hs)
+    for row in range(nrow):
+        vmin = min(i.min() for i in images[row])
+        vmax = max(i.max() for i in images[row])
+        for col in range(ncol):
+            ax = fig.add_subplot(spec[row, col])
+            imshow = ax.imshow(images[row][col], cmap="gray", vmin=vmin, vmax=vmax)
+            ax.set_aspect("equal")
+            if colorbar and col == ncol - 1:
+                plt.colorbar(imshow, ax=ax)
+            if col == 0:
+                y_label = "After" if row else "Before"
+                y_label += ("\n" + shapes[row]) if shapes[0] != shapes[1] else ""
+                ax.set_ylabel(y_label)
+            # print yticks for the right most column
+            if col != ncol - 1 or colorbar:
+                ax.set_yticks([])
+            else:
+                ax.yaxis.tick_right()
+                for n, label in enumerate(ax.yaxis.get_ticklabels()):
+                    if n > 2:
+                        label.set_visible(False)
+            ax.set_xticks([])
+            ax.set_frame_on(False)
+            if labels is not None:
+                ax.imshow(labels[row][col], cmap="hsv", alpha=0.9, interpolation="nearest")
+    # title is e.g., Flipd(keys=keys, spatial_axis=0)
+    title = transform_name + "("
+    for k, v in transform_args.items():
+        title += k + "="
+        if isinstance(v, str):
+            title += "'" + v + "'"
+        elif isinstance(v, (np.ndarray, torch.Tensor)):
+            title += "[array]"
+        elif isinstance(v, Callable):
+            title += "[callable]"
+        else:
+            title += str(v)
+        title += ", "
+    if len(transform_args) > 0:
+        title = title[:-2]
+    title += ")"
+    # shorten the lines
+    title = textwrap.fill(title, 50, break_long_words=False, subsequent_indent=" " * (len(transform_name) + 1))
+    fig.suptitle(title, x=0.1, horizontalalignment="left")
+    fig.savefig(filename)
+    plt.close(fig)
+
+
+def get_images(data, is_label=False):
+    """Get image. If is dictionary, extract key. If is list, stack. If both dictionary and list, do both.
+    Also return the image size as string to be used im the imshow. If it's a list, return `N x (H,W,D)`.
+    """
+    # If not a list, convert
+    if not isinstance(data, list):
+        data = [data]
+    key = CommonKeys.LABEL if is_label else CommonKeys.IMAGE
+    is_map = isinstance(data[0], dict)
+    # length of the list will be equal to number of samples produced. This will be 1 except for transforms that
+    # produce `num_samples`.
+    data = [d[key] if is_map else d for d in data]
+    data = [d[0] for d in data]  # remove channel component
+
+    # for each sample, create a list of the orthogonal views. If image is 2d, length will be 1. If 3d, there
+    # will be three orthogonal views
+    num_samples = len(data)
+    num_orthog_views = 3 if data[0].ndim == 3 else 1
+    shape_str = (f"{num_samples} x " if num_samples > 1 else "") + str(data[0].shape)
+    for i in range(num_samples):
+        data[i] = [get_2d_slice(data[i], view, is_label) for view in range(num_orthog_views)]
+
+    out = []
+    if num_samples == 1:
+        out = data[0]
+    else:
+        # we might need to panel the images. this happens if a transform produces e.g. 4 output images.
+        # In this case, we create a 2-by-2 grid from them. Output will be a list containing n_orthog_views,
+        # each element being either the image (if num_samples is 1) or the panelled image.
+        nrows = int(np.floor(num_samples ** 0.5))
+        for view in range(num_orthog_views):
+            result = np.asarray([d[view] for d in data])
+            nindex, height, width = result.shape
+            ncols = nindex // nrows
+            # only implemented for square number of images (e.g. 4 images goes to a 2-by-2 panel)
+            if nindex != nrows * ncols:
+                raise NotImplementedError
+            # want result.shape = (height*nrows, width*ncols), have to be careful about striding
+            result = result.reshape(nrows, ncols, height, width).swapaxes(1, 2).reshape(height * nrows, width * ncols)
+            out.append(result)
+    return out, shape_str
+
+
+def create_transform_im(
+    transform, transform_args, data, ndim=3, colorbar=False, update_doc=True, out_dir=None, seed=0, is_post=False
+):
+    """Create an image with the before and after of the transform.
+    Also update the transform's documentation to point to this image."""
+
+    transform = transform(**transform_args)
+
+    if not has_matplotlib:
+        raise RuntimeError
+
+    if isinstance(transform, Randomizable):
+        # increment the seed for map transforms so they're different to the array versions.
+        seed = seed + 1 if isinstance(transform, MapTransform) else seed
+        transform.set_random_state(seed)
+
+    out_dir = os.environ.get("MONAI_DOC_IMAGES")
+    if out_dir is None:
+        raise RuntimeError(
+            "Please git clone https://github.com/Project-MONAI/DocImages"
+            + " and then set the environment variable `MONAI_DOC_IMAGES`"
+        )
+    out_dir = os.path.join(out_dir, "transforms")
+
+    # Path is transform name
+    transform_name = transform.__class__.__name__
+    out_fname = transform_name + ".png"
+    out_file = os.path.join(out_dir, out_fname)
+
+    is_map = isinstance(transform, MapTransform)
+    data_in = pre_process_data(deepcopy(data), ndim, is_map, is_post)
+
+    data_tr = transform(deepcopy(data_in))
+
+    images_before, before_shape = get_images(data_in)
+    images_after, after_shape = get_images(data_tr)
+    images = (images_before, images_after)
+    shapes = (before_shape, after_shape)
+
+    labels = None
+    if is_map:
+        labels_before, *_ = get_images(data_in, is_label=True)
+        labels_after, *_ = get_images(data_tr, is_label=True)
+        labels = (labels_before, labels_after)
+
+    save_image(images, labels, out_file, transform_name, transform_args, shapes, colorbar)
+
+    if update_doc:
+        base_dir = pathlib.Path(__file__).parent.parent.parent
+        rst_path = os.path.join(base_dir, "docs", "source", "transforms.rst")
+        update_docstring(rst_path, transform_name)
+
+
+if __name__ == "__main__":
+
+    keys = [CommonKeys.IMAGE, CommonKeys.LABEL]
+    data = get_data(keys)
+    create_transform_im(RandFlip, dict(prob=1, spatial_axis=1), data)
+    create_transform_im(RandFlipd, dict(keys=keys, prob=1, spatial_axis=2), data)
+    create_transform_im(Flip, dict(spatial_axis=1), data)
+    create_transform_im(Flipd, dict(keys=keys, spatial_axis=2), data)
+    create_transform_im(Flipd, dict(keys=keys, spatial_axis=2), data)
+    create_transform_im(Orientation, dict(axcodes="RPI", image_only=True), data)
+    create_transform_im(Orientationd, dict(keys=keys, axcodes="RPI"), data)
+    create_transform_im(
+        Rand3DElastic, dict(prob=1.0, sigma_range=(1, 2), magnitude_range=(0.5, 0.5), shear_range=(1, 1, 1)), data
+    )
+    create_transform_im(Affine, dict(shear_params=(0, 0.5, 0), image_only=True, padding_mode="zeros"), data)
+    create_transform_im(
+        Affined, dict(keys=keys, shear_params=(0, 0.5, 0), mode=["bilinear", "nearest"], padding_mode="zeros"), data
+    )
+    create_transform_im(RandAffine, dict(prob=1, shear_range=(0.5, 0.5), padding_mode="zeros"), data)
+    create_transform_im(
+        RandAffined,
+        dict(keys=keys, prob=1, shear_range=(0.5, 0.5), mode=["bilinear", "nearest"], padding_mode="zeros"),
+        data,
+    )
+    create_transform_im(
+        Rand3DElastic, dict(sigma_range=(5, 7), magnitude_range=(50, 150), prob=1, padding_mode="zeros"), data
+    )
+    create_transform_im(
+        Rand2DElastic, dict(prob=1, spacing=(20, 20), magnitude_range=(1, 2), padding_mode="zeros"), data, 2
+    )
+    create_transform_im(
+        Rand2DElasticd,
+        dict(
+            keys=keys,
+            prob=1,
+            spacing=(20, 20),
+            magnitude_range=(1, 2),
+            padding_mode="zeros",
+            mode=["bilinear", "nearest"],
+        ),
+        data,
+        2,
+    )
+    create_transform_im(
+        Rand3DElasticd,
+        dict(
+            keys=keys,
+            sigma_range=(5, 7),
+            magnitude_range=(50, 150),
+            prob=1,
+            padding_mode="zeros",
+            mode=["bilinear", "nearest"],
+        ),
+        data,
+    )
+    create_transform_im(Rotate90, dict(spatial_axes=(1, 2)), data)
+    create_transform_im(Rotate90d, dict(keys=keys, spatial_axes=(1, 2)), data)
+    create_transform_im(RandRotate90, dict(prob=1), data)
+    create_transform_im(RandRotate90d, dict(keys=keys, prob=1), data)
+    create_transform_im(Rotate, dict(angle=0.1), data)
+    create_transform_im(Rotated, dict(keys=keys, angle=0.1, mode=["bilinear", "nearest"]), data)
+    create_transform_im(RandRotate, dict(prob=1, range_x=[0.4, 0.4]), data)
+    create_transform_im(RandRotated, dict(keys=keys, prob=1, range_x=[0.4, 0.4], mode=["bilinear", "nearest"]), data)
+    create_transform_im(Zoom, dict(zoom=0.6), data)
+    create_transform_im(Zoomd, dict(keys=keys, zoom=1.3, mode=["area", "nearest"]), data)
+    create_transform_im(RandZoom, dict(prob=1, min_zoom=0.6, max_zoom=0.8), data)
+    create_transform_im(RandZoomd, dict(keys=keys, prob=1, min_zoom=1.3, max_zoom=1.5, mode=["area", "nearest"]), data)
+    create_transform_im(ScaleIntensity, dict(minv=0, maxv=10), data, colorbar=True)
+    create_transform_im(ScaleIntensityd, dict(keys=CommonKeys.IMAGE, minv=0, maxv=10), data, colorbar=True)
+    create_transform_im(RandScaleIntensity, dict(prob=1.0, factors=(5, 10)), data, colorbar=True)
+    create_transform_im(
+        RandScaleIntensityd, dict(keys=CommonKeys.IMAGE, prob=1.0, factors=(5, 10)), data, colorbar=True
+    )
+    create_transform_im(DivisiblePad, dict(k=64), data)
+    create_transform_im(DivisiblePadd, dict(keys=keys, k=64), data)
+    create_transform_im(CropForeground, dict(), data)
+    create_transform_im(CropForegroundd, dict(keys=keys, source_key=CommonKeys.IMAGE), data)
+    create_transform_im(RandGaussianNoise, dict(prob=1, mean=0, std=0.1), data)
+    create_transform_im(RandGaussianNoised, dict(keys=CommonKeys.IMAGE, prob=1, mean=0, std=0.1), data)
+    create_transform_im(KSpaceSpikeNoise, dict(loc=(100, 100, 100), k_intensity=13), data)
+    create_transform_im(KSpaceSpikeNoised, dict(keys=CommonKeys.IMAGE, loc=(100, 100, 100), k_intensity=13), data)
+    create_transform_im(RandKSpaceSpikeNoise, dict(prob=1, intensity_range=(10, 13)), data)
+    create_transform_im(
+        RandKSpaceSpikeNoised,
+        dict(
+            keys=CommonKeys.IMAGE,
+            global_prob=1,
+            prob=1,
+            common_sampling=True,
+            intensity_ranges={CommonKeys.IMAGE: (13, 15)},
+        ),
+        data,
+    )
+    create_transform_im(GibbsNoise, dict(alpha=0.8), data)
+    create_transform_im(GibbsNoised, dict(keys=CommonKeys.IMAGE, alpha=0.8), data)
+    create_transform_im(RandGibbsNoise, dict(prob=1.0, alpha=(0.6, 0.8)), data)
+    create_transform_im(RandGibbsNoised, dict(keys=CommonKeys.IMAGE, prob=1.0, alpha=(0.6, 0.8)), data)
+    create_transform_im(ShiftIntensity, dict(offset=1), data, colorbar=True)
+    create_transform_im(ShiftIntensityd, dict(keys=CommonKeys.IMAGE, offset=1), data, colorbar=True)
+    create_transform_im(RandShiftIntensity, dict(prob=1.0, offsets=(10, 20)), data, colorbar=True)
+    create_transform_im(
+        RandShiftIntensityd, dict(keys=CommonKeys.IMAGE, prob=1.0, offsets=(10, 20)), data, colorbar=True
+    )
+    create_transform_im(StdShiftIntensity, dict(factor=10), data, colorbar=True)
+    create_transform_im(StdShiftIntensityd, dict(keys=CommonKeys.IMAGE, factor=10), data, colorbar=True)
+    create_transform_im(RandStdShiftIntensity, dict(prob=1.0, factors=(5, 10)), data, colorbar=True)
+    create_transform_im(
+        RandStdShiftIntensityd, dict(keys=CommonKeys.IMAGE, prob=1.0, factors=(5, 10)), data, colorbar=True
+    )
+    create_transform_im(RandBiasField, dict(prob=1, coeff_range=(0.2, 0.3)), data)
+    create_transform_im(RandBiasFieldd, dict(keys=CommonKeys.IMAGE, prob=1, coeff_range=(0.2, 0.3)), data)
+    create_transform_im(NormalizeIntensity, dict(subtrahend=0, divisor=10), data, colorbar=True)
+    create_transform_im(NormalizeIntensityd, dict(keys=CommonKeys.IMAGE, subtrahend=0, divisor=10), data, colorbar=True)
+    create_transform_im(ThresholdIntensity, dict(threshold=0.4, above=False, cval=0.9), data, colorbar=True)
+    create_transform_im(
+        ThresholdIntensityd, dict(keys=CommonKeys.IMAGE, threshold=0.4, above=False, cval=0.9), data, colorbar=True
+    )
+    create_transform_im(ScaleIntensityRange, dict(a_min=0, a_max=1, b_min=1, b_max=10), data, colorbar=True)
+    create_transform_im(
+        ScaleIntensityRanged, dict(keys=CommonKeys.IMAGE, a_min=0, a_max=1, b_min=1, b_max=10), data, colorbar=True
+    )
+    create_transform_im(ScaleIntensityRangePercentiles, dict(lower=5, upper=95, b_min=1, b_max=10), data, colorbar=True)
+    create_transform_im(
+        ScaleIntensityRangePercentilesd,
+        dict(keys=CommonKeys.IMAGE, lower=5, upper=95, b_min=1, b_max=10),
+        data,
+        colorbar=True,
+    )
+    create_transform_im(AdjustContrast, dict(gamma=2), data, colorbar=True)
+    create_transform_im(AdjustContrastd, dict(keys=CommonKeys.IMAGE, gamma=2), data, colorbar=True)
+    create_transform_im(RandAdjustContrast, dict(prob=1, gamma=(1.5, 2)), data, colorbar=True)
+    create_transform_im(RandAdjustContrastd, dict(keys=CommonKeys.IMAGE, prob=1, gamma=(1.5, 2)), data, colorbar=True)
+    create_transform_im(MaskIntensity, dict(mask_data=data[CommonKeys.IMAGE], select_fn=lambda x: x > 0.3), data)
+    create_transform_im(
+        MaskIntensityd, dict(keys=CommonKeys.IMAGE, mask_key=CommonKeys.IMAGE, select_fn=lambda x: x > 0.3), data
+    )
+    create_transform_im(GaussianSmooth, dict(sigma=2), data)
+    create_transform_im(GaussianSmoothd, dict(keys=CommonKeys.IMAGE, sigma=2), data)
+    create_transform_im(RandGaussianSmooth, dict(prob=1.0, sigma_x=(1, 2)), data)
+    create_transform_im(RandGaussianSmoothd, dict(keys=CommonKeys.IMAGE, prob=1.0, sigma_x=(1, 2)), data)
+    create_transform_im(GaussianSharpen, dict(), GaussianSmoothd(CommonKeys.IMAGE, 2)(data))
+    create_transform_im(GaussianSharpend, dict(keys=CommonKeys.IMAGE), GaussianSmoothd(CommonKeys.IMAGE, 2)(data))
+    create_transform_im(RandGaussianSharpen, dict(prob=1), GaussianSmoothd(CommonKeys.IMAGE, 2)(data))
+    create_transform_im(
+        RandGaussianSharpend, dict(keys=CommonKeys.IMAGE, prob=1), GaussianSmoothd(CommonKeys.IMAGE, 2)(data)
+    )
+    create_transform_im(RandHistogramShift, dict(prob=1, num_control_points=3), data, colorbar=True)
+    create_transform_im(
+        RandHistogramShiftd, dict(keys=CommonKeys.IMAGE, prob=1, num_control_points=3), data, colorbar=True
+    )
+    create_transform_im(RandCoarseDropout, dict(prob=1, holes=200, spatial_size=20, fill_value=0), data)
+    create_transform_im(
+        RandCoarseDropoutd, dict(keys=CommonKeys.IMAGE, prob=1, holes=200, spatial_size=20, fill_value=0), data
+    )
+    create_transform_im(RandCoarseShuffle, dict(prob=1, holes=200, spatial_size=20), data)
+    create_transform_im(RandCoarseShuffled, dict(keys=CommonKeys.IMAGE, prob=1, holes=200, spatial_size=20), data)
+    create_transform_im(HistogramNormalize, dict(num_bins=10), data)
+    create_transform_im(HistogramNormalized, dict(keys=CommonKeys.IMAGE, num_bins=10), data)
+    create_transform_im(SpatialPad, dict(spatial_size=(300, 300, 300)), data)
+    create_transform_im(SpatialPadd, dict(keys=keys, spatial_size=(300, 300, 300)), data)
+    create_transform_im(BorderPad, dict(spatial_border=10), data)
+    create_transform_im(BorderPadd, dict(keys=keys, spatial_border=10), data)
+    create_transform_im(SpatialCrop, dict(roi_center=(75, 75, 75), roi_size=(100, 100, 100)), data)
+    create_transform_im(SpatialCropd, dict(keys=keys, roi_center=(75, 75, 75), roi_size=(100, 100, 100)), data)
+    create_transform_im(CenterSpatialCrop, dict(roi_size=(100, 100, 100)), data)
+    create_transform_im(CenterSpatialCropd, dict(keys=keys, roi_size=(100, 100, 100)), data)
+    create_transform_im(RandSpatialCrop, dict(roi_size=(100, 100, 100), random_size=False), data)
+    create_transform_im(RandSpatialCropd, dict(keys=keys, roi_size=(100, 100, 100), random_size=False), data)
+    create_transform_im(RandSpatialCropSamples, dict(num_samples=4, roi_size=(100, 100, 100), random_size=False), data)
+    create_transform_im(
+        RandSpatialCropSamplesd, dict(keys=keys, num_samples=4, roi_size=(100, 100, 100), random_size=False), data
+    )
+    create_transform_im(
+        RandWeightedCrop, dict(spatial_size=(100, 100, 100), num_samples=4, weight_map=data[CommonKeys.IMAGE] > 0), data
+    )
+    create_transform_im(
+        RandWeightedCropd, dict(keys=keys, spatial_size=(100, 100, 100), num_samples=4, w_key=CommonKeys.IMAGE), data
+    )
+    create_transform_im(
+        RandCropByPosNegLabel,
+        dict(spatial_size=(100, 100, 100), label=data[CommonKeys.LABEL], neg=0, num_samples=4),
+        data,
+    )
+    create_transform_im(
+        RandCropByPosNegLabeld,
+        dict(keys=keys, spatial_size=(100, 100, 100), label_key=CommonKeys.LABEL, neg=0, num_samples=4),
+        data,
+    )
+    create_transform_im(
+        RandCropByLabelClasses,
+        dict(
+            spatial_size=(100, 100, 100), label=data[CommonKeys.LABEL] > 0, num_classes=2, ratios=[0, 1], num_samples=4
+        ),
+        data,
+    )
+    create_transform_im(
+        RandCropByLabelClassesd,
+        dict(
+            keys=keys,
+            spatial_size=(100, 100, 100),
+            label_key=CommonKeys.LABEL,
+            num_classes=2,
+            ratios=[0, 1],
+            num_samples=4,
+        ),
+        data,
+    )
+    create_transform_im(ResizeWithPadOrCrop, dict(spatial_size=(100, 100, 100)), data)
+    create_transform_im(ResizeWithPadOrCropd, dict(keys=keys, spatial_size=(100, 100, 100)), data)
+    create_transform_im(RandScaleCrop, dict(roi_scale=0.4), data)
+    create_transform_im(RandScaleCropd, dict(keys=keys, roi_scale=0.4), data)
+    create_transform_im(CenterScaleCrop, dict(roi_scale=0.4), data)
+    create_transform_im(CenterScaleCropd, dict(keys=keys, roi_scale=0.4), data)
+    create_transform_im(
+        AsDiscrete, dict(num_classes=2, threshold_values=True, logit_thresh=10), data, is_post=True, colorbar=True
+    )
+    create_transform_im(
+        AsDiscreted,
+        dict(keys=CommonKeys.LABEL, num_classes=2, threshold_values=True, logit_thresh=10),
+        data,
+        is_post=True,
+    )
+    create_transform_im(LabelFilter, dict(applied_labels=(1, 2, 3, 4, 5, 6)), data, is_post=True)
+    create_transform_im(
+        LabelFilterd, dict(keys=CommonKeys.LABEL, applied_labels=(1, 2, 3, 4, 5, 6)), data, is_post=True
+    )
+    create_transform_im(LabelToContour, dict(), data, is_post=True)
+    create_transform_im(LabelToContourd, dict(keys=CommonKeys.LABEL), data, is_post=True)
+    create_transform_im(Spacing, dict(pixdim=(5, 5, 5), image_only=True), data)
+    create_transform_im(Spacingd, dict(keys=keys, pixdim=(5, 5, 5), mode=["bilinear", "nearest"]), data)
+    create_transform_im(RandAxisFlip, dict(prob=1), data)
+    create_transform_im(RandAxisFlipd, dict(keys=keys, prob=1), data)
+    create_transform_im(Resize, dict(spatial_size=(100, 100, 100)), data)
+    create_transform_im(Resized, dict(keys=keys, spatial_size=(100, 100, 100), mode=["area", "nearest"]), data)
diff --git a/monai/transforms/utils_pytorch_numpy_unification.py b/monai/transforms/utils_pytorch_numpy_unification.py
index 2eebe3eda3..52c5248f96 100644
--- a/monai/transforms/utils_pytorch_numpy_unification.py
+++ b/monai/transforms/utils_pytorch_numpy_unification.py
@@ -9,14 +9,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Sequence, Union
+
 import numpy as np
 import torch
 
 from monai.config.type_definitions import NdarrayOrTensor
+from monai.utils.misc import is_module_ver_at_least
 
 __all__ = [
     "moveaxis",
     "in1d",
+    "clip",
+    "percentile",
+    "where",
+    "nonzero",
+    "floor_divide",
+    "unravel_index",
+    "unravel_indices",
+    "ravel",
+    "any_np_pt",
+    "maximum",
 ]
 
 
@@ -50,3 +63,201 @@ def in1d(x, y):
     if isinstance(x, np.ndarray):
         return np.in1d(x, y)
     return (x[..., None] == torch.tensor(y, device=x.device)).any(-1).view(-1)
+
+
+def clip(a: NdarrayOrTensor, a_min, a_max) -> NdarrayOrTensor:
+    """`np.clip` with equivalent implementation for torch."""
+    result: NdarrayOrTensor
+    if isinstance(a, np.ndarray):
+        result = np.clip(a, a_min, a_max)
+    else:
+        result = torch.clip(a, a_min, a_max)
+    return result
+
+
+def percentile(x: NdarrayOrTensor, q) -> Union[NdarrayOrTensor, float, int]:
+    """`np.percentile` with equivalent implementation for torch.
+
+    Pytorch uses `quantile`, but this functionality is only available from v1.7.
+    For earlier methods, we calculate it ourselves. This doesn't do interpolation,
+    so is the equivalent of ``numpy.percentile(..., interpolation="nearest")``.
+
+    Args:
+        x: input data
+        q: percentile to compute (should in range 0 <= q <= 100)
+
+    Returns:
+        Resulting value (scalar)
+    """
+    if np.isscalar(q):
+        if not 0 <= q <= 100:
+            raise ValueError
+    elif any(q < 0) or any(q > 100):
+        raise ValueError
+    result: Union[NdarrayOrTensor, float, int]
+    if isinstance(x, np.ndarray):
+        result = np.percentile(x, q)
+    else:
+        q = torch.tensor(q, device=x.device)
+        if hasattr(torch, "quantile"):
+            result = torch.quantile(x, q / 100.0)
+        else:
+            # Note that ``kthvalue()`` works one-based, i.e., the first sorted value
+            # corresponds to k=1, not k=0. Thus, we need the `1 +`.
+            k = 1 + (0.01 * q * (x.numel() - 1)).round().int()
+            if k.numel() > 1:
+                r = [x.view(-1).kthvalue(int(_k)).values.item() for _k in k]
+                result = torch.tensor(r, device=x.device)
+            else:
+                result = x.view(-1).kthvalue(int(k)).values.item()
+
+    return result
+
+
+def where(condition: NdarrayOrTensor, x=None, y=None) -> NdarrayOrTensor:
+    """
+    Note that `torch.where` may convert y.dtype to x.dtype.
+    """
+    result: NdarrayOrTensor
+    if isinstance(condition, np.ndarray):
+        if x is not None:
+            result = np.where(condition, x, y)
+        else:
+            result = np.where(condition)
+    else:
+        if x is not None:
+            x = torch.as_tensor(x, device=condition.device)
+            y = torch.as_tensor(y, device=condition.device, dtype=x.dtype)
+            result = torch.where(condition, x, y)
+        else:
+            result = torch.where(condition)  # type: ignore
+    return result
+
+
+def nonzero(x: NdarrayOrTensor):
+    """`np.nonzero` with equivalent implementation for torch.
+
+    Args:
+        idx: array/tensor
+
+    Returns:
+        Index unravelled for given shape
+    """
+    if isinstance(x, np.ndarray):
+        return np.nonzero(x)[0]
+    return torch.nonzero(x).flatten()
+
+
+def floor_divide(a: NdarrayOrTensor, b) -> NdarrayOrTensor:
+    """`np.floor_divide` with equivalent implementation for torch.
+
+    As of pt1.8, use `torch.div(..., rounding_mode="floor")`, and
+    before that, use `torch.floor_divide`.
+
+    Args:
+        a: first array/tensor
+        b: scalar to divide by
+
+    Returns:
+        Element-wise floor division between two arrays/tensors.
+    """
+    if isinstance(a, torch.Tensor):
+        if is_module_ver_at_least(torch, (1, 8, 0)):
+            return torch.div(a, b, rounding_mode="floor")
+        return torch.floor_divide(a, b)
+    return np.floor_divide(a, b)
+
+
+def unravel_index(idx, shape):
+    """`np.unravel_index` with equivalent implementation for torch.
+
+    Args:
+        idx: index to unravel
+        shape: shape of array/tensor
+
+    Returns:
+        Index unravelled for given shape
+    """
+    if isinstance(idx, torch.Tensor):
+        coord = []
+        for dim in reversed(shape):
+            coord.append(idx % dim)
+            idx = floor_divide(idx, dim)
+        return torch.stack(coord[::-1])
+    return np.asarray(np.unravel_index(idx, shape))
+
+
+def unravel_indices(idx, shape):
+    """Computing unravel cooridnates from indices.
+
+    Args:
+        idx: a sequence of indices to unravel
+        shape: shape of array/tensor
+
+    Returns:
+        Stacked indices unravelled for given shape
+    """
+    lib_stack = torch.stack if isinstance(idx[0], torch.Tensor) else np.stack
+    return lib_stack([unravel_index(i, shape) for i in idx])
+
+
+def ravel(x: NdarrayOrTensor):
+    """`np.ravel` with equivalent implementation for torch.
+
+    Args:
+        x: array/tensor to ravel
+
+    Returns:
+        Return a contiguous flattened array/tensor.
+    """
+    if isinstance(x, torch.Tensor):
+        if hasattr(torch, "ravel"):
+            return x.ravel()
+        return x.flatten().contiguous()
+    return np.ravel(x)
+
+
+def any_np_pt(x: NdarrayOrTensor, axis: Union[int, Sequence[int]]):
+    """`np.any` with equivalent implementation for torch.
+
+    For pytorch, convert to boolean for compatibility with older versions.
+
+    Args:
+        x: input array/tensor
+        axis: axis to perform `any` over
+
+    Returns:
+        Return a contiguous flattened array/tensor.
+    """
+    if isinstance(x, np.ndarray):
+        return np.any(x, axis)
+
+    # pytorch can't handle multiple dimensions to `any` so loop across them
+    axis = [axis] if not isinstance(axis, Sequence) else axis
+    for ax in axis:
+        try:
+            x = torch.any(x, ax)
+        except RuntimeError:
+            # older versions of pytorch require the input to be cast to boolean
+            x = torch.any(x.bool(), ax)
+    return x
+
+
+def maximum(a: NdarrayOrTensor, b: NdarrayOrTensor) -> NdarrayOrTensor:
+    """`np.maximum` with equivalent implementation for torch.
+
+    `torch.maximum` only available from pt>1.6, else use `torch.stack` and `torch.max`.
+
+    Args:
+        a: first array/tensor
+        b: second array/tensor
+
+    Returns:
+        Element-wise maximum between two arrays/tensors.
+    """
+    if isinstance(a, torch.Tensor) and isinstance(b, torch.Tensor):
+        # is torch and has torch.maximum (pt>1.6)
+        if hasattr(torch, "maximum"):
+            return torch.maximum(a, b)
+        return torch.stack((a, b)).max(dim=0)[0]
+    return np.maximum(a, b)
diff --git a/monai/utils/__init__.py b/monai/utils/__init__.py
index aa8f02f815..dc3922933d 100644
--- a/monai/utils/__init__.py
+++ b/monai/utils/__init__.py
@@ -77,6 +77,7 @@
 from .state_cacher import StateCacher
 from .type_conversion import (
     convert_data_type,
+    convert_to_cupy,
     convert_to_dst_type,
     convert_to_numpy,
     convert_to_tensor,
diff --git a/monai/utils/aliases.py b/monai/utils/aliases.py
index 2b7b29eeb5..a08dab4f95 100644
--- a/monai/utils/aliases.py
+++ b/monai/utils/aliases.py
@@ -70,8 +70,8 @@ def resolve_name(name):
         try:
             mod = importlib.import_module(modname)
             obj = getattr(mod, declname, None)
-        except ModuleNotFoundError:
-            raise ValueError(f"Module {modname!r} not found.")
+        except ModuleNotFoundError as not_found_err:
+            raise ValueError(f"Module {modname!r} not found.") from not_found_err
 
         if obj is None:
             raise ValueError(f"Module {modname!r} does not have member {declname!r}.")
diff --git a/monai/utils/deprecated.py b/monai/utils/deprecated.py
index 3a4568b06c..c66b60996b 100644
--- a/monai/utils/deprecated.py
+++ b/monai/utils/deprecated.py
@@ -60,6 +60,9 @@ def deprecated(
         Decorated definition which warns or raises exception when used
     """
 
+    # if version_val.startswith("0+"):
+    #     # version unknown, set version_val to a large value (assuming the latest version)
+    #     version_val = "100"
     if since is not None and removed is not None and not version_leq(since, removed):
         raise ValueError(f"since must be less or equal to removed, got since={since}, removed={removed}.")
     is_not_yet_deprecated = since is not None and version_val != since and version_leq(version_val, since)
@@ -116,6 +119,7 @@ def deprecated_arg(
     removed: Optional[str] = None,
     msg_suffix: str = "",
     version_val: str = __version__,
+    new_name: Optional[str] = None,
 ):
     """
     Marks a particular named argument of a callable as deprecated. The same conditions for `since` and `removed` as
@@ -130,6 +134,8 @@ def deprecated_arg(
     using the Sphinx directives such as `.. versionchanged:: version` and `.. deprecated:: version`.
     https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-versionadded
 
+    In the current implementation type annotations are not preserved.
+
 
     Args:
         name: name of position or keyword argument to mark as deprecated.
@@ -137,17 +143,21 @@ def deprecated_arg(
         removed: version at which the argument was removed and no longer usable.
         msg_suffix: message appended to warning/exception detailing reasons for deprecation and what to use instead.
         version_val: (used for testing) version to compare since and removed against, default is MONAI version.
+        new_name: name of position or keyword argument to replace the deprecated argument.
 
     Returns:
-        Decorated callable which warns or raises exception when deprecated argument used
+        Decorated callable which warns or raises exception when deprecated argument used.
     """
+
+    if version_val.startswith("0+") or not f"{version_val}".strip()[0].isdigit():
+        # version unknown, set version_val to a large value (assuming the latest version)
+        version_val = "100"
     if since is not None and removed is not None and not version_leq(since, removed):
         raise ValueError(f"since must be less or equal to removed, got since={since}, removed={removed}.")
     is_not_yet_deprecated = since is not None and version_val != since and version_leq(version_val, since)
     if is_not_yet_deprecated:
         # smaller than `since`, do nothing
         return lambda obj: obj
-
     if since is None and removed is None:
         # raise a DeprecatedError directly
         is_removed = True
@@ -157,9 +167,6 @@ def deprecated_arg(
         is_deprecated = since is not None and version_leq(since, version_val)
         is_removed = removed is not None and version_leq(removed, version_val)
 
-    if is_not_yet_deprecated:
-        return lambda obj: obj
-
     def _decorator(func):
         argname = f"{func.__name__}_{name}"
 
@@ -180,6 +187,9 @@ def _decorator(func):
 
         @wraps(func)
         def _wrapper(*args, **kwargs):
+            if new_name is not None and name in kwargs:
+                # replace the deprecated arg "name" with "new_name"
+                kwargs[new_name] = kwargs[name]
             binding = sig.bind(*args, **kwargs).arguments
 
             positional_found = name in binding
diff --git a/monai/utils/jupyter_utils.py b/monai/utils/jupyter_utils.py
index 26487083b1..f862452fb1 100644
--- a/monai/utils/jupyter_utils.py
+++ b/monai/utils/jupyter_utils.py
@@ -16,11 +16,14 @@
 
 from enum import Enum
 from threading import RLock, Thread
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import torch
 
+from monai.config import IgniteInfo
+from monai.utils.module import min_version, optional_import
+
 try:
     import matplotlib.pyplot as plt
 
@@ -28,14 +31,11 @@
 except ImportError:
     has_matplotlib = False
 
-try:
+if TYPE_CHECKING:
     from ignite.engine import Engine, Events
-
-    has_ignite = True
-except ImportError:
-    Engine = object
-    Events = object
-    has_ignite = False
+else:
+    Engine, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Engine")
+    Events, _ = optional_import("ignite.engine", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Events")
 
 LOSS_NAME = "loss"
 
@@ -128,7 +128,7 @@ def plot_metric_images(
         else:
             im.imshow(np.squeeze(imagemap[n]), cmap="gray")
 
-        im.set_title("%s\n%.3g -> %.3g" % (n, imagemap[n].min(), imagemap[n].max()))
+        im.set_title(f"{n}\n{imagemap[n].min():.3g} -> {imagemap[n].max():.3g}")
         im.axis("off")
         axes.append(im)
 
@@ -161,6 +161,7 @@ def plot_engine_status(
     window_fraction: int = 20,
     image_fn: Optional[Callable] = tensor_to_images,
     fig=None,
+    selected_inst: int = 0,
 ) -> Tuple:
     """
     Plot the status of the given Engine with its logger. The plot will consist of a graph of loss values and metrics
@@ -189,22 +190,36 @@ def plot_engine_status(
     graphmap = {LOSS_NAME: logger.loss}
     graphmap.update(logger.metrics)
 
-    imagemap = {}
+    imagemap: Dict = {}
     if image_fn is not None and engine.state is not None and engine.state.batch is not None:
         for src in (engine.state.batch, engine.state.output):
+            label = "Batch" if src is engine.state.batch else "Output"
+            batch_selected_inst = selected_inst  # selected batch index, set to 0 when src is decollated
+
+            # if the src object is a list of elements, ie. a decollated batch, select an element and keep it as
+            # a dictionary of tensors with a batch dimension added
             if isinstance(src, list):
-                for i, s in enumerate(src):
-                    if isinstance(s, dict):
-                        for k, v in s.items():
-                            if isinstance(v, torch.Tensor):
-                                image = image_fn(k, v)
-                                if image is not None:
-                                    imagemap[f"{k}_{i}"] = image
-                    elif isinstance(s, torch.Tensor):
-                        label = "Batch" if src is engine.state.batch else "Output"
-                        image = image_fn(label, s)
+                selected_dict = src[selected_inst]  # select this element
+                batch_selected_inst = 0  # set the selection to be the single index in the batch dimension
+                # store each tensor that is interpretable as an image with an added batch dimension
+                src = {k: v[None] for k, v in selected_dict.items() if isinstance(v, torch.Tensor) and v.ndim >= 3}
+
+            # images will be generated from the batch item selected above only, or from the single item given as `src`
+
+            if isinstance(src, dict):
+                for k, v in src.items():
+                    if isinstance(v, torch.Tensor) and v.ndim >= 4:
+                        image = image_fn(k, v[batch_selected_inst])
+
+                        # if we have images add each one separately to the map
                         if image is not None:
-                            imagemap[f"{label}_{i}"] = image
+                            for i, im in enumerate(image):
+                                imagemap[f"{k}_{i}"] = im
+
+            elif isinstance(src, torch.Tensor):
+                image = image_fn(label, src)
+                if image is not None:
+                    imagemap[f"{label}_{i}"] = image
 
     axes = plot_metric_images(fig, title, graphmap, imagemap, yscale, avg_keys, window_fraction)
 
diff --git a/monai/utils/module.py b/monai/utils/module.py
index 33314fb0e3..ef019a8955 100644
--- a/monai/utils/module.py
+++ b/monai/utils/module.py
@@ -364,17 +364,25 @@ def get_torch_version_tuple():
     Returns:
         tuple of ints represents the pytorch major/minor version.
     """
-    return tuple((int(x) for x in torch.__version__.split(".")[:2]))
+    return tuple(int(x) for x in torch.__version__.split(".")[:2])
 
 
-def version_leq(lhs, rhs):
-    """Returns True if version `lhs` is earlier or equal to `rhs`."""
+def version_leq(lhs: str, rhs: str):
+    """
+    Returns True if version `lhs` is earlier or equal to `rhs`.
+
+    Args:
+        lhs: version name to compare with `rhs`, return True if earlier or equal to `rhs`.
+        rhs: version name to compare with `lhs`, return True if later or equal to `lhs`.
+
+    """
 
+    lhs, rhs = str(lhs), str(rhs)
     ver, has_ver = optional_import("pkg_resources", name="parse_version")
     if has_ver:
         return ver(lhs) <= ver(rhs)
 
-    def _try_cast(val):
+    def _try_cast(val: str):
         val = val.strip()
         try:
             m = match("(\\d+)(.*)", val)
@@ -390,10 +398,10 @@ def _try_cast(val):
     rhs = rhs.split("+", 1)[0]
 
     # parse the version strings in this basic way without `packaging` package
-    lhs = map(_try_cast, lhs.split("."))
-    rhs = map(_try_cast, rhs.split("."))
+    lhs_ = map(_try_cast, lhs.split("."))
+    rhs_ = map(_try_cast, rhs.split("."))
 
-    for l, r in zip(lhs, rhs):
+    for l, r in zip(lhs_, rhs_):
         if l != r:
             if isinstance(l, int) and isinstance(r, int):
                 return l < r
diff --git a/monai/utils/profiling.py b/monai/utils/profiling.py
index 695653e897..d7459885fb 100644
--- a/monai/utils/profiling.py
+++ b/monai/utils/profiling.py
@@ -56,7 +56,7 @@ def wrapper(*args, **kwargs):
         cpu_time = torch.autograd.profiler.format_time(cpu_time)
         gpu_time = torch.autograd.profiler.format_time(gpu_time)
 
-        print("cpu time: {}, gpu time: {}".format(cpu_time, gpu_time), flush=True)
+        print(f"cpu time: {cpu_time}, gpu time: {gpu_time}", flush=True)
 
         return result
 
@@ -83,7 +83,7 @@ def wrapper(*args, **kwargs):
 
         total_time = (end - start) * 1e6
         total_time_str = torch.autograd.profiler.format_time(total_time)
-        print("end to end time: {}".format(total_time_str), flush=True)
+        print(f"end to end time: {total_time_str}", flush=True)
 
         return result
 
diff --git a/monai/utils/type_conversion.py b/monai/utils/type_conversion.py
index b0ce187e38..648e68440e 100644
--- a/monai/utils/type_conversion.py
+++ b/monai/utils/type_conversion.py
@@ -6,6 +6,7 @@
 
 from monai.config.type_definitions import DtypeLike, NdarrayOrTensor
 from monai.utils import optional_import
+from monai.utils.module import look_up_option
 
 cp, has_cp = optional_import("cupy")
 cp_ndarray, _ = optional_import("cupy", name="ndarray")
@@ -16,6 +17,7 @@
     "get_equivalent_dtype",
     "convert_data_type",
     "get_dtype",
+    "convert_to_cupy",
     "convert_to_numpy",
     "convert_to_tensor",
     "convert_to_dst_type",
@@ -40,31 +42,34 @@
 
 def dtype_torch_to_numpy(dtype):
     """Convert a torch dtype to its numpy equivalent."""
-    if dtype not in _torch_to_np_dtype:
-        raise ValueError(f"Unsupported torch to numpy dtype '{dtype}'.")
-    return _torch_to_np_dtype[dtype]
+    return look_up_option(dtype, _torch_to_np_dtype)
 
 
 def dtype_numpy_to_torch(dtype):
     """Convert a numpy dtype to its torch equivalent."""
     # np dtypes can be given as np.float32 and np.dtype(np.float32) so unify them
     dtype = np.dtype(dtype) if type(dtype) is type else dtype
-    if dtype not in _np_to_torch_dtype:
-        raise ValueError(f"Unsupported numpy to torch dtype '{dtype}'.")
-    return _np_to_torch_dtype[dtype]
+    return look_up_option(dtype, _np_to_torch_dtype)
 
 
 def get_equivalent_dtype(dtype, data_type):
     """Convert to the `dtype` that corresponds to `data_type`.
-    Example:
+
+    Example::
+
         im = torch.tensor(1)
         dtype = get_equivalent_dtype(np.float32, type(im))
+
     """
+    if dtype is None:
+        return None
     if data_type is torch.Tensor:
         if type(dtype) is torch.dtype:
+            # already a torch dtype and target `data_type` is torch.Tensor
             return dtype
         return dtype_numpy_to_torch(dtype)
     if type(dtype) is not torch.dtype:
+        # assuming the dtype is ok if it is not a torch dtype and target `data_type` is not torch.Tensor
         return dtype
     return dtype_torch_to_numpy(dtype)
 
@@ -83,7 +88,12 @@ def get_dtype(data: Any):
     return type(data)
 
 
-def convert_to_tensor(data, wrap_sequence: bool = False):
+def convert_to_tensor(
+    data,
+    dtype: Optional[torch.dtype] = None,
+    device: Optional[torch.device] = None,
+    wrap_sequence: bool = False,
+):
     """
     Utility to convert the input data to a PyTorch Tensor. If passing a dictionary, list or tuple,
     recursively check every item and convert it to PyTorch Tensor.
@@ -92,34 +102,41 @@ def convert_to_tensor(data, wrap_sequence: bool = False):
         data: input data can be PyTorch Tensor, numpy array, list, dictionary, int, float, bool, str, etc.
             will convert Tensor, Numpy array, float, int, bool to Tensors, strings and objects keep the original.
             for dictionary, list or tuple, convert every item to a Tensor if applicable.
-        wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[tensor(1), tensor(2)]`.
-            If `True`, then `[1, 2]` -> `tensor([1, 2])`.
+        dtype: target data type to when converting to Tensor.
+        device: target device to put the converted Tensor data.
+        wrap_sequence: if `False`, then lists will recursively call this function.
+            E.g., `[1, 2]` -> `[tensor(1), tensor(2)]`. If `True`, then `[1, 2]` -> `tensor([1, 2])`.
 
     """
     if isinstance(data, torch.Tensor):
-        return data.contiguous()
+        return data.to(dtype=dtype, device=device, memory_format=torch.contiguous_format)  # type: ignore
     if isinstance(data, np.ndarray):
         # skip array of string classes and object, refer to:
         # https://github.com/pytorch/pytorch/blob/v1.9.0/torch/utils/data/_utils/collate.py#L13
         if re.search(r"[SaUO]", data.dtype.str) is None:
             # numpy array with 0 dims is also sequence iterable,
             # `ascontiguousarray` will add 1 dim if img has no dim, so we only apply on data with dims
-            return torch.as_tensor(data if data.ndim == 0 else np.ascontiguousarray(data))
-    elif isinstance(data, (float, int, bool)):
-        return torch.as_tensor(data)
-    elif isinstance(data, Sequence) and wrap_sequence:
-        return torch.as_tensor(data)
+            if data.ndim > 0:
+                data = np.ascontiguousarray(data)
+            return torch.as_tensor(data, dtype=dtype, device=device)  # type: ignore
+    elif (
+        has_cp
+        and isinstance(data, cp_ndarray)
+        or isinstance(data, (float, int, bool))
+        or (isinstance(data, Sequence) and wrap_sequence)
+    ):
+        return torch.as_tensor(data, dtype=dtype, device=device)  # type: ignore
     elif isinstance(data, list):
-        return [convert_to_tensor(i) for i in data]
+        return [convert_to_tensor(i, dtype=dtype, device=device) for i in data]
     elif isinstance(data, tuple):
-        return tuple(convert_to_tensor(i) for i in data)
+        return tuple(convert_to_tensor(i, dtype=dtype, device=device) for i in data)
     elif isinstance(data, dict):
-        return {k: convert_to_tensor(v) for k, v in data.items()}
+        return {k: convert_to_tensor(v, dtype=dtype, device=device) for k, v in data.items()}
 
     return data
 
 
-def convert_to_numpy(data, wrap_sequence: bool = False):
+def convert_to_numpy(data, dtype: Optional[DtypeLike] = None, wrap_sequence: bool = False):
     """
     Utility to convert the input data to a numpy array. If passing a dictionary, list or tuple,
     recursively check every item and convert it to numpy array.
@@ -128,23 +145,22 @@ def convert_to_numpy(data, wrap_sequence: bool = False):
         data: input data can be PyTorch Tensor, numpy array, list, dictionary, int, float, bool, str, etc.
             will convert Tensor, Numpy array, float, int, bool to numpy arrays, strings and objects keep the original.
             for dictionary, list or tuple, convert every item to a numpy array if applicable.
+        dtype: target data type when converting to numpy array.
         wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[array(1), array(2)]`.
             If `True`, then `[1, 2]` -> `array([1, 2])`.
     """
     if isinstance(data, torch.Tensor):
-        data = data.detach().cpu().numpy()
+        data = data.detach().to(dtype=get_equivalent_dtype(dtype, torch.Tensor), device="cpu").numpy()
     elif has_cp and isinstance(data, cp_ndarray):
-        data = cp.asnumpy(data)
-    elif isinstance(data, (float, int, bool)):
-        data = np.asarray(data)
-    elif isinstance(data, Sequence) and wrap_sequence:
-        return np.asarray(data)
+        data = cp.asnumpy(data).astype(dtype)
+    elif isinstance(data, (np.ndarray, float, int, bool)) or (isinstance(data, Sequence) and wrap_sequence):
+        data = np.asarray(data, dtype=dtype)
     elif isinstance(data, list):
-        return [convert_to_numpy(i) for i in data]
+        return [convert_to_numpy(i, dtype=dtype) for i in data]
     elif isinstance(data, tuple):
-        return tuple(convert_to_numpy(i) for i in data)
+        return tuple(convert_to_numpy(i, dtype=dtype) for i in data)
     elif isinstance(data, dict):
-        return {k: convert_to_numpy(v) for k, v in data.items()}
+        return {k: convert_to_numpy(v, dtype=dtype) for k, v in data.items()}
 
     if isinstance(data, np.ndarray) and data.ndim > 0:
         data = np.ascontiguousarray(data)
@@ -152,6 +168,41 @@ def convert_to_numpy(data, wrap_sequence: bool = False):
     return data
 
 
+def convert_to_cupy(data, dtype, wrap_sequence: bool = True):
+    """
+    Utility to convert the input data to a cupy array. If passing a dictionary, list or tuple,
+    recursively check every item and convert it to cupy array.
+
+    Args:
+        data: input data can be PyTorch Tensor, numpy array, cupy array, list, dictionary, int, float, bool, str, etc.
+            Tensor, numpy array, cupy array, float, int, bool are converted to cupy arrays
+
+            for dictionary, list or tuple, convert every item to a numpy array if applicable.
+        dtype: target data type when converting to Cupy array.
+        wrap_sequence: if `False`, then lists will recursively call this function. E.g., `[1, 2]` -> `[array(1), array(2)]`.
+            If `True`, then `[1, 2]` -> `array([1, 2])`.
+    """
+
+    # direct calls
+    if isinstance(data, (cp_ndarray, np.ndarray, torch.Tensor, float, int, bool)) or (
+        isinstance(data, Sequence) and wrap_sequence
+    ):
+        data = cp.asarray(data, dtype)
+    elif isinstance(data, list):
+        return [convert_to_cupy(i, dtype) for i in data]
+    elif isinstance(data, tuple):
+        return tuple(convert_to_cupy(i, dtype) for i in data)
+    elif isinstance(data, dict):
+        return {k: convert_to_cupy(v, dtype) for k, v in data.items()}
+    # make it contiguous
+    if not isinstance(data, cp.ndarray):
+        raise ValueError(f"The input data type [{type(data)}] cannot be converted into cupy arrays!")
+
+    if data.ndim > 0:
+        data = cp.ascontiguousarray(data)
+    return data
+
+
 def convert_data_type(
     data: Any,
     output_type: Optional[type] = None,
@@ -170,12 +221,23 @@ def convert_data_type(
             If left blank, it remains unchanged.
     Returns:
         modified data, orig_type, orig_device
+
+    Note:
+        When both `output_type` and `dtype` are specified with different backend
+        (e.g., `torch.Tensor` and `np.float32`), the `output_type` will be used as the primary type,
+        for example::
+
+            >>> convert_data_type(1, torch.Tensor, dtype=np.float32)
+            (1.0, <class 'torch.Tensor'>, None)
+
     """
     orig_type: Any
     if isinstance(data, torch.Tensor):
         orig_type = torch.Tensor
     elif isinstance(data, np.ndarray):
         orig_type = np.ndarray
+    elif has_cp and isinstance(data, cp.ndarray):
+        orig_type = cp.ndarray
     else:
         orig_type = type(data)
 
@@ -183,33 +245,40 @@ def convert_data_type(
 
     output_type = output_type or orig_type
 
-    dtype = get_equivalent_dtype(dtype or get_dtype(data), output_type)
+    dtype_ = get_equivalent_dtype(dtype or get_dtype(data), output_type)
 
     if output_type is torch.Tensor:
-        if orig_type is not torch.Tensor:
-            data = convert_to_tensor(data)
-        if dtype != data.dtype:
-            data = data.to(dtype)
-        if device is not None:
-            data = data.to(device)
+        data = convert_to_tensor(data, dtype=dtype_, device=device)
     elif output_type is np.ndarray:
-        if orig_type is not np.ndarray:
-            data = convert_to_numpy(data)
-        if data is not None and dtype != data.dtype:
-            data = data.astype(dtype)
+        data = convert_to_numpy(data, dtype=dtype_)
+    elif has_cp and output_type is cp.ndarray:
+        data = convert_to_cupy(data, dtype=dtype_)
     else:
         raise ValueError(f"Unsupported output type: {output_type}")
     return data, orig_type, orig_device
 
 
-def convert_to_dst_type(src: Any, dst: NdarrayOrTensor) -> Tuple[NdarrayOrTensor, type, Optional[torch.device]]:
+def convert_to_dst_type(
+    src: Any, dst: NdarrayOrTensor, dtype: Optional[Union[DtypeLike, torch.dtype]] = None
+) -> Tuple[NdarrayOrTensor, type, Optional[torch.device]]:
     """
-    Convert `src` to the same `torch.Tensor`/`np.ndarray` and data type as `dst`.
+    If `dst` is an instance of `torch.Tensor` or its subclass, convert `src` to `torch.Tensor` with the same data type as `dst`,
+    if `dst` is an instance of `numpy.ndarray` or its subclass, convert to `numpy.ndarray` with the same data type as `dst`,
+    otherwise, convert to the type of `dst` directly.
+    `dtype` is an optional argument if the target `dtype` is different from the original `dst`'s data type.
 
     See Also:
         :func:`convert_data_type`
     """
-    device = None
+    device = dst.device if isinstance(dst, torch.Tensor) else None
+    if dtype is None:
+        dtype = dst.dtype
+
+    output_type: Any
     if isinstance(dst, torch.Tensor):
-        device = dst.device
-    return convert_data_type(data=src, output_type=type(dst), device=device, dtype=dst.dtype)
+        output_type = torch.Tensor
+    elif isinstance(dst, np.ndarray):
+        output_type = np.ndarray
+    else:
+        output_type = type(dst)
+    return convert_data_type(data=src, output_type=output_type, device=device, dtype=dtype)
diff --git a/monai/visualize/class_activation_maps.py b/monai/visualize/class_activation_maps.py
index 992eaecdac..6109d76a8a 100644
--- a/monai/visualize/class_activation_maps.py
+++ b/monai/visualize/class_activation_maps.py
@@ -137,6 +137,11 @@ def __call__(self, x, class_idx=None, retain_graph=False):
             self.score = self.class_score(logits, self.class_idx)
             self.model.zero_grad()
             self.score.sum().backward(retain_graph=retain_graph)
+            for layer in self.target_layers:
+                if layer not in self.gradients:
+                    raise RuntimeError(
+                        f"Backward hook for {layer} is not triggered; `requires_grad` of {layer} should be `True`."
+                    )
             grad = tuple(self.gradients[layer] for layer in self.target_layers)
         if train:
             self.model.train()
@@ -221,6 +226,8 @@ class CAM(CAMBase):
 
     .. code-block:: python
 
+        import torch
+
         # densenet 2d
         from monai.networks.nets import DenseNet121
         from monai.visualize import CAM
@@ -319,6 +326,8 @@ class GradCAM(CAMBase):
 
     .. code-block:: python
 
+        import torch
+
         # densenet 2d
         from monai.networks.nets import DenseNet121
         from monai.visualize import GradCAM
diff --git a/monai/visualize/img2tensorboard.py b/monai/visualize/img2tensorboard.py
index ccdbdc2396..fd6dc9483b 100644
--- a/monai/visualize/img2tensorboard.py
+++ b/monai/visualize/img2tensorboard.py
@@ -44,7 +44,7 @@ def _image3_animated_gif(tag: str, image: Union[np.ndarray, torch.Tensor], scale
     if len(image.shape) != 3:
         raise AssertionError("3D image tensors expected to be in `HWD` format, len(image.shape) != 3")
 
-    ims = [(np.asarray((image[:, :, i])) * scale_factor).astype(np.uint8) for i in range(image.shape[2])]
+    ims = [(np.asarray(image[:, :, i]) * scale_factor).astype(np.uint8) for i in range(image.shape[2])]
     ims = [GifImage.fromarray(im) for im in ims]
     img_str = b""
     for b_data in PIL.GifImagePlugin.getheader(ims[0])[0]:
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 785454ad5d..254cb06d27 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,6 +1,6 @@
 # Full requirements for developments
 -r requirements-min.txt
-pytorch-ignite==0.4.5
+pytorch-ignite==0.4.6
 gdown>=3.6.4
 scipy
 itk>=5.2
@@ -31,8 +31,11 @@ Sphinx==3.5.3
 recommonmark==0.6.0
 sphinx-autodoc-typehints==1.11.1
 sphinx-rtd-theme==0.5.2
-cucim~=0.19.0; platform_system == "Linux"
+cucim>=21.8.2; platform_system == "Linux"
 openslide-python==1.1.2
+imagecodecs; platform_system == "Linux"
+tifffile; platform_system == "Linux"
 pandas
 requests
 einops
+transformers
diff --git a/setup.cfg b/setup.cfg
index 6efe768a6f..19f04de526 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,16 +34,17 @@ all =
     pillow
     tensorboard
     gdown>=3.6.4
-    pytorch-ignite==0.4.5
+    pytorch-ignite==0.4.6
     torchvision
     itk>=5.2
     tqdm>=4.47.0
     lmdb
     psutil
-    cucim~=0.19.0
+    cucim>=21.8.2
     openslide-python==1.1.2
     pandas
     einops
+    transformers
 nibabel =
     nibabel
 skimage =
@@ -55,7 +56,7 @@ tensorboard =
 gdown =
     gdown>=3.6.4
 ignite =
-    pytorch-ignite==0.4.5
+    pytorch-ignite==0.4.6
 torchvision =
     torchvision
 itk =
@@ -67,22 +68,37 @@ lmdb =
 psutil =
     psutil
 cucim =
-    cucim~=0.19.0
+    cucim>=21.8.2
 openslide =
     openslide-python==1.1.2
 pandas =
     pandas
 einops =
     einops
+transformers =
+    transformers
 [flake8]
 select = B,C,E,F,N,P,T4,W,B9
 max_line_length = 120
 # C408 ignored because we like the dict keyword argument syntax
 # E501 is not flexible enough, we're using B950 instead
 ignore =
-    E203,E305,E402,E501,E721,E741,F821,F841,F999,W503,W504,C408,E302,W291,E303,
-    # N812 lowercase 'torch.nn.functional' imported as non lowercase 'F'
-    N812
+    E203
+    E305
+    E402
+    E501
+    E721
+    E741
+    F821
+    F841
+    F999
+    W503
+    W504
+    C408
+    E302
+    W291
+    E303
+    N812  # lowercase 'torch.nn.functional' imported as non lowercase 'F'
 per_file_ignores = __init__.py: F401
 exclude = *.pyi,.git,.eggs,monai/_version.py,versioneer.py,venv,.venv,_version.py
 
diff --git a/tests/clang_format_utils.py b/tests/clang_format_utils.py
index 41902eb272..1391fdcd47 100644
--- a/tests/clang_format_utils.py
+++ b/tests/clang_format_utils.py
@@ -50,10 +50,10 @@ def get_and_check_clang_format():
     """
     # If the host platform is not in PLATFORM_TO_HASH, it is unsupported.
     if HOST_PLATFORM not in PLATFORM_TO_HASH:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
+        print(f"Unsupported platform: {HOST_PLATFORM}")
         return False
     if HOST_PLATFORM not in PLATFORM_TO_CF_URL:
-        print("Unsupported platform: {}".format(HOST_PLATFORM))
+        print(f"Unsupported platform: {HOST_PLATFORM}")
         return False
 
     try:
@@ -69,7 +69,7 @@ def get_and_check_clang_format():
     mode = os.stat(CLANG_FORMAT_PATH).st_mode
     mode |= stat.S_IXUSR
     os.chmod(CLANG_FORMAT_PATH, mode)
-    print("Using clang-format located at {}".format(CLANG_FORMAT_PATH))
+    print(f"Using clang-format located at {CLANG_FORMAT_PATH}")
 
     return True
 
diff --git a/tests/min_tests.py b/tests/min_tests.py
index 5b376d7b57..5e188a828e 100644
--- a/tests/min_tests.py
+++ b/tests/min_tests.py
@@ -75,7 +75,6 @@ def run_testsuit():
         "test_handler_surface_distance",
         "test_handler_tb_image",
         "test_handler_tb_stats",
-        "test_handler_transform_inverter",
         "test_handler_validation",
         "test_hausdorff_distance",
         "test_header_correct",
@@ -140,6 +139,7 @@ def run_testsuit():
         "test_zoom",
         "test_zoom_affine",
         "test_zoomd",
+        "test_transchex",
     ]
     assert sorted(exclude_cases) == sorted(set(exclude_cases)), f"Duplicated items in {exclude_cases}"
 
diff --git a/tests/ngc_mmar_loading.py b/tests/ngc_mmar_loading.py
new file mode 100644
index 0000000000..c1ed22de5d
--- /dev/null
+++ b/tests/ngc_mmar_loading.py
@@ -0,0 +1,37 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.apps.mmars import MODEL_DESC, load_from_mmar
+from monai.config import print_debug_info
+
+
+class TestAllDownloadingMMAR(unittest.TestCase):
+    def setUp(self):
+        print_debug_info()
+        self.test_dir = "./"
+
+    @parameterized.expand((item,) for item in MODEL_DESC)
+    def test_loading_mmar(self, item):
+        pretrained_model = load_from_mmar(item=item, mmar_dir="./", map_location="cpu")
+        self.assertTrue(isinstance(pretrained_model, torch.nn.Module))
+
+    def tearDown(self):
+        print(os.listdir(self.test_dir))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_adjust_contrast.py b/tests/test_adjust_contrast.py
index 8e78698360..80ac61cfea 100644
--- a/tests/test_adjust_contrast.py
+++ b/tests/test_adjust_contrast.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import AdjustContrast
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [1.0]
 
@@ -28,15 +28,16 @@ class TestAdjustContrast(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_correct_results(self, gamma):
         adjuster = AdjustContrast(gamma=gamma)
-        result = adjuster(self.imt)
-        if gamma == 1.0:
-            expected = self.imt
-        else:
-            epsilon = 1e-7
-            img_min = self.imt.min()
-            img_range = self.imt.max() - img_min
-            expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
-        np.testing.assert_allclose(expected, result, rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster(p(self.imt))
+            if gamma == 1.0:
+                expected = self.imt
+            else:
+                epsilon = 1e-7
+                img_min = self.imt.min()
+                img_range = self.imt.max() - img_min
+                expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
+            assert_allclose(expected, result, rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_adjust_contrastd.py b/tests/test_adjust_contrastd.py
index 65647607e4..1e1c2cf8bc 100644
--- a/tests/test_adjust_contrastd.py
+++ b/tests/test_adjust_contrastd.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import AdjustContrastd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [1.0]
 
@@ -28,15 +28,16 @@ class TestAdjustContrastd(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_correct_results(self, gamma):
         adjuster = AdjustContrastd("img", gamma=gamma)
-        result = adjuster({"img": self.imt})
-        if gamma == 1.0:
-            expected = self.imt
-        else:
-            epsilon = 1e-7
-            img_min = self.imt.min()
-            img_range = self.imt.max() - img_min
-            expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
-        np.testing.assert_allclose(expected, result["img"], rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster({"img": p(self.imt)})
+            if gamma == 1.0:
+                expected = self.imt
+            else:
+                epsilon = 1e-7
+                img_min = self.imt.min()
+                img_range = self.imt.max() - img_min
+                expected = np.power(((self.imt - img_min) / float(img_range + epsilon)), gamma) * img_range + img_min
+            assert_allclose(expected, result["img"], rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_affine.py b/tests/test_affine.py
index dd82d72e23..bd89f1a436 100644
--- a/tests/test_affine.py
+++ b/tests/test_affine.py
@@ -16,78 +16,139 @@
 from parameterized import parameterized
 
 from monai.transforms import Affine
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(9).reshape((1, 3, 3)), "spatial_size": (-1, 0)},
-        np.arange(9).reshape(1, 3, 3),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None, image_only=True),
-        {"img": np.arange(9).reshape((1, 3, 3)), "spatial_size": (-1, 0)},
-        np.arange(9).reshape(1, 3, 3),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.arange(4).reshape(1, 2, 2),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2)), "spatial_size": (4, 4)},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(rotate_params=[np.pi / 2], padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2)), "spatial_size": (4, 4)},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(27).reshape((1, 3, 3, 3)), "spatial_size": (-1, 0, 0)},
-        np.arange(27).reshape(1, 3, 3, 3),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "spatial_size": (4, 4, 4)},
-        np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 4.0, 5.0, 0.0], [0.0, 6.0, 7.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(9).reshape((1, 3, 3))), "spatial_size": (-1, 0)},
+                p(np.arange(9).reshape(1, 3, 3)),
             ]
-        ),
-    ],
-    [
-        dict(rotate_params=[np.pi / 2], padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "spatial_size": (4, 4, 4)},
-        np.array(
+        )
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 6.0, 4.0, 0.0], [0.0, 7.0, 5.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(padding_mode="zeros", device=device, image_only=True),
+                {"img": p(np.arange(9).reshape((1, 3, 3))), "spatial_size": (-1, 0)},
+                p(np.arange(9).reshape(1, 3, 3)),
             ]
-        ),
-    ],
-]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.arange(4).reshape(1, 2, 2)),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2))), "spatial_size": (4, 4)},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(rotate_params=[np.pi / 2], padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2))), "spatial_size": (4, 4)},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(27).reshape((1, 3, 3, 3))), "spatial_size": (-1, 0, 0)},
+                p(np.arange(27).reshape(1, 3, 3, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(padding_mode="zeros", device=device),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2))), "spatial_size": (4, 4, 4)},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 1.0, 0.0],
+                                    [0.0, 2.0, 3.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 4.0, 5.0, 0.0],
+                                    [0.0, 6.0, 7.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(rotate_params=[np.pi / 2], padding_mode="zeros", device=device),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2))), "spatial_size": (4, 4, 4)},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 2.0, 0.0, 0.0],
+                                    [0.0, 3.0, 1.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 6.0, 4.0, 0.0],
+                                    [0.0, 7.0, 5.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestAffine(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_affine(self, input_param, input_data, expected_val):
         g = Affine(**input_param)
         result = g(**input_data)
         if isinstance(result, tuple):
             result = result[0]
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_affine_grid.py b/tests/test_affine_grid.py
index 24772b9a21..9bf2bcf90e 100644
--- a/tests/test_affine_grid.py
+++ b/tests/test_affine_grid.py
@@ -16,88 +16,108 @@
 from parameterized import parameterized
 
 from monai.transforms import AffineGrid
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        {"as_tensor_output": False, "device": torch.device("cpu:0")},
-        {"spatial_size": (2, 2)},
-        np.array([[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]], [[1.0, 1.0], [1.0, 1.0]]]),
-    ],
-    [
-        {"as_tensor_output": True, "device": None},
-        {"spatial_size": (2, 2)},
-        torch.tensor([[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]], [[1.0, 1.0], [1.0, 1.0]]]),
-    ],
-    [{"as_tensor_output": False, "device": None}, {"grid": np.ones((3, 3, 3))}, np.ones((3, 3, 3))],
-    [{"as_tensor_output": True, "device": torch.device("cpu:0")}, {"grid": np.ones((3, 3, 3))}, torch.ones((3, 3, 3))],
-    [{"as_tensor_output": False, "device": None}, {"grid": torch.ones((3, 3, 3))}, np.ones((3, 3, 3))],
-    [
-        {"as_tensor_output": True, "device": torch.device("cpu:0")},
-        {"grid": torch.ones((3, 3, 3))},
-        torch.ones((3, 3, 3)),
-    ],
-    [
-        {
-            "rotate_params": (1.0, 1.0),
-            "scale_params": (-20, 10),
-            "as_tensor_output": True,
-            "device": torch.device("cpu:0"),
-        },
-        {"grid": torch.ones((3, 3, 3))},
-        torch.tensor(
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
             [
-                [[-19.2208, -19.2208, -19.2208], [-19.2208, -19.2208, -19.2208], [-19.2208, -19.2208, -19.2208]],
-                [[-11.4264, -11.4264, -11.4264], [-11.4264, -11.4264, -11.4264], [-11.4264, -11.4264, -11.4264]],
-                [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                {"device": device},
+                {"spatial_size": (2, 2)},
+                np.array([[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]], [[1.0, 1.0], [1.0, 1.0]]]),
             ]
-        ),
-    ],
-    [
-        {
-            "rotate_params": (1.0, 1.0, 1.0),
-            "scale_params": (-20, 10),
-            "as_tensor_output": True,
-            "device": torch.device("cpu:0"),
-        },
-        {"grid": torch.ones((4, 3, 3, 3))},
-        torch.tensor(
+        )
+
+        TESTS.append([{"device": device}, {"grid": p(np.ones((3, 3, 3)))}, p(np.ones((3, 3, 3)))])
+        TESTS.append([{"device": device}, {"grid": p(torch.ones((3, 3, 3)))}, p(np.ones((3, 3, 3)))])
+        TESTS.append(
+            [
+                {
+                    "rotate_params": (1.0, 1.0),
+                    "scale_params": (-20, 10),
+                    "device": device,
+                },
+                {"grid": p(torch.ones((3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [
+                                [-19.2208, -19.2208, -19.2208],
+                                [-19.2208, -19.2208, -19.2208],
+                                [-19.2208, -19.2208, -19.2208],
+                            ],
+                            [
+                                [-11.4264, -11.4264, -11.4264],
+                                [-11.4264, -11.4264, -11.4264],
+                                [-11.4264, -11.4264, -11.4264],
+                            ],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
             [
-                [
-                    [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
-                    [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
-                    [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
-                ],
-                [
-                    [[-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381]],
-                    [[-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381]],
-                    [[-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381], [-20.2381, -20.2381, -20.2381]],
-                ],
-                [
-                    [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
-                    [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
-                    [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
-                ],
-                [
-                    [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
-                    [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
-                    [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
-                ],
+                {
+                    "rotate_params": (1.0, 1.0, 1.0),
+                    "scale_params": (-20, 10),
+                    "device": device,
+                },
+                {"grid": p(torch.ones((4, 3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [
+                                [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
+                                [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
+                                [[-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435], [-9.5435, -9.5435, -9.5435]],
+                            ],
+                            [
+                                [
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                ],
+                                [
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                ],
+                                [
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                    [-20.2381, -20.2381, -20.2381],
+                                ],
+                            ],
+                            [
+                                [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
+                                [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
+                                [[-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844], [-0.5844, -0.5844, -0.5844]],
+                            ],
+                            [
+                                [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
+                                [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
+                                [[1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000], [1.0000, 1.0000, 1.0000]],
+                            ],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-]
+        )
+
+_rtol = 5e-2 if is_tf32_env() else 1e-4
 
 
 class TestAffineGrid(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_affine_grid(self, input_param, input_data, expected_val):
         g = AffineGrid(**input_param)
         result, _ = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        if "device" in input_data:
+            self.assertEqual(result.device, input_data[device])
+        assert_allclose(result, expected_val, type_test=False, rtol=_rtol)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_affine_transform.py b/tests/test_affine_transform.py
index 42af58be73..ef39c297ce 100644
--- a/tests/test_affine_transform.py
+++ b/tests/test_affine_transform.py
@@ -17,6 +17,9 @@
 
 from monai.networks import normalize_transform, to_norm_affine
 from monai.networks.layers import AffineTransform
+from tests.utils import is_tf32_env
+
+_rtol = 1e-4 if not is_tf32_env() else 5e-3
 
 TEST_NORM_CASES = [
     [(4, 5), True, [[[0.666667, 0, -1], [0, 0.5, -1], [0, 0, 1]]]],
@@ -95,7 +98,7 @@ def test_to_norm_affine(self, affine, src_size, dst_size, align_corners, expecte
             affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32)
             new_affine = to_norm_affine(affine, src_size, dst_size, align_corners)
             new_affine = new_affine.detach().cpu().numpy()
-            np.testing.assert_allclose(new_affine, expected, atol=1e-4)
+            np.testing.assert_allclose(new_affine, expected, atol=1e-5, rtol=_rtol)
 
     @parameterized.expand(TEST_ILL_TO_NORM_AFFINE_CASES)
     def test_to_norm_affine_ill(self, affine, src_size, dst_size, align_corners):
@@ -113,7 +116,7 @@ def test_affine_shift(self):
         out = AffineTransform()(image, affine)
         out = out.detach().cpu().numpy()
         expected = [[[[0, 4, 1, 3], [0, 7, 6, 8], [0, 3, 5, 3]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_affine_shift_1(self):
         affine = torch.as_tensor([[1.0, 0.0, -1.0], [0.0, 1.0, -1.0]])
@@ -121,7 +124,7 @@ def test_affine_shift_1(self):
         out = AffineTransform()(image, affine)
         out = out.detach().cpu().numpy()
         expected = [[[[0, 0, 0, 0], [0, 4, 1, 3], [0, 7, 6, 8]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_affine_shift_2(self):
         affine = torch.as_tensor([[1.0, 0.0, -1.0], [0.0, 1.0, 0.0]])
@@ -129,28 +132,28 @@ def test_affine_shift_2(self):
         out = AffineTransform()(image, affine)
         out = out.detach().cpu().numpy()
         expected = [[[[0, 0, 0, 0], [4, 1, 3, 2], [7, 6, 8, 5]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_zoom(self):
         affine = torch.as_tensor([[1.0, 0.0, 0.0], [0.0, 2.0, 0.0]])
         image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0"))
         out = AffineTransform((3, 2))(image, affine)
         expected = [[[[1, 3], [5, 7], [9, 11]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_zoom_1(self):
         affine = torch.as_tensor([[2.0, 0.0, 0.0], [0.0, 1.0, 0.0]])
         image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0"))
         out = AffineTransform()(image, affine, (1, 4))
         expected = [[[[1, 2, 3, 4]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=_rtol)
 
     def test_zoom_2(self):
         affine = torch.as_tensor([[2.0, 0.0, 0.0], [0.0, 2.0, 0.0]], dtype=torch.float32)
         image = torch.arange(1.0, 13.0).view(1, 1, 3, 4).to(device=torch.device("cpu:0"))
         out = AffineTransform((1, 2))(image, affine)
         expected = [[[[1, 3]]]]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-5, rtol=_rtol)
 
     def test_affine_transform_minimum(self):
         t = np.pi / 3
@@ -169,7 +172,7 @@ def test_affine_transform_minimum(self):
                 ]
             ]
         ]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-3, rtol=_rtol)
 
     def test_affine_transform_2d(self):
         t = np.pi / 3
@@ -188,7 +191,7 @@ def test_affine_transform_2d(self):
                 ]
             ]
         ]
-        np.testing.assert_allclose(out, expected, atol=1e-5)
+        np.testing.assert_allclose(out, expected, atol=1e-3, rtol=_rtol)
 
         if torch.cuda.is_available():
             affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32)
@@ -205,7 +208,7 @@ def test_affine_transform_2d(self):
                     ]
                 ]
             ]
-            np.testing.assert_allclose(out, expected, atol=1e-4)
+            np.testing.assert_allclose(out, expected, atol=5e-3)
 
     def test_affine_transform_3d(self):
         t = np.pi / 3
@@ -231,7 +234,7 @@ def test_affine_transform_3d(self):
                 ]
             ],
         ]
-        np.testing.assert_allclose(out, expected, atol=1e-4)
+        np.testing.assert_allclose(out, expected, atol=1e-4, rtol=_rtol)
 
         if torch.cuda.is_available():
             affine = torch.as_tensor(affine, device=torch.device("cuda:0"), dtype=torch.float32)
@@ -255,7 +258,7 @@ def test_affine_transform_3d(self):
                     ]
                 ],
             ]
-            np.testing.assert_allclose(out, expected, atol=1e-4)
+            np.testing.assert_allclose(out, expected, atol=5e-3)
 
     def test_ill_affine_transform(self):
         with self.assertRaises(ValueError):  # image too small
diff --git a/tests/test_affined.py b/tests/test_affined.py
index 850f12905d..142cedc8d9 100644
--- a/tests/test_affined.py
+++ b/tests/test_affined.py
@@ -16,85 +16,142 @@
 from parameterized import parameterized
 
 from monai.transforms import Affined
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        dict(keys="img", padding_mode="zeros", as_tensor_output=False, spatial_size=(-1, 0), device=None),
-        {"img": np.arange(9).reshape((1, 3, 3))},
-        np.arange(9).reshape(1, 3, 3),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.arange(4).reshape(1, 2, 2),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", spatial_size=(4, 4), as_tensor_output=False, device=None),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(
-            keys="img",
-            rotate_params=[np.pi / 2],
-            padding_mode="zeros",
-            spatial_size=(4, 4),
-            as_tensor_output=False,
-            device=None,
-        ),
-        {"img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", spatial_size=(-1, 0, 0), as_tensor_output=False, device=None),
-        {"img": np.arange(27).reshape((1, 3, 3, 3))},
-        np.arange(27).reshape(1, 3, 3, 3),
-    ],
-    [
-        dict(keys="img", padding_mode="zeros", spatial_size=(4, 4, 4), as_tensor_output=False, device=None),
-        {"img": np.arange(8).reshape((1, 2, 2, 2))},
-        np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 4.0, 5.0, 0.0], [0.0, 6.0, 7.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(keys="img", padding_mode="zeros", spatial_size=(-1, 0), device=device),
+                {"img": p(np.arange(9).reshape((1, 3, 3)))},
+                p(np.arange(9).reshape(1, 3, 3)),
             ]
-        ),
-    ],
-    [
-        dict(
-            keys="img",
-            rotate_params=[np.pi / 2],
-            padding_mode="zeros",
-            spatial_size=(4, 4, 4),
-            as_tensor_output=False,
-            device=None,
-        ),
-        {"img": np.arange(8).reshape((1, 2, 2, 2))},
-        np.array(
+        )
+        TESTS.append(
             [
-                [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 6.0, 4.0, 0.0], [0.0, 7.0, 5.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                ]
+                dict(keys="img", padding_mode="zeros", device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.arange(4).reshape(1, 2, 2)),
             ]
-        ),
-    ],
-]
+        )
+        TESTS.append(
+            [
+                dict(keys="img", padding_mode="zeros", spatial_size=(4, 4), device=device),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    keys="img",
+                    rotate_params=[np.pi / 2],
+                    padding_mode="zeros",
+                    spatial_size=(4, 4),
+                    device=device,
+                ),
+                {"img": p(np.arange(4).reshape((1, 2, 2)))},
+                p(np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 2.0, 0.0, 0.0], [0.0, 3.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(keys="img", padding_mode="zeros", spatial_size=(-1, 0, 0), device=device),
+                {"img": p(np.arange(27).reshape((1, 3, 3, 3)))},
+                p(np.arange(27).reshape(1, 3, 3, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(keys="img", padding_mode="zeros", spatial_size=(4, 4, 4), device=device),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 1.0, 0.0],
+                                    [0.0, 2.0, 3.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 4.0, 5.0, 0.0],
+                                    [0.0, 6.0, 7.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    keys="img",
+                    rotate_params=[np.pi / 2],
+                    padding_mode="zeros",
+                    spatial_size=(4, 4, 4),
+                    device=device,
+                ),
+                {"img": p(np.arange(8).reshape((1, 2, 2, 2)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 2.0, 0.0, 0.0],
+                                    [0.0, 3.0, 1.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 6.0, 4.0, 0.0],
+                                    [0.0, 7.0, 5.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                                [
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                    [0.0, 0.0, 0.0, 0.0],
+                                ],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestAffined(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_affine(self, input_param, input_data, expected_val):
         g = Affined(**input_param)
         result = g(input_data)["img"]
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_as_channel_first.py b/tests/test_as_channel_first.py
index 0d1b1c7d3a..918e576011 100644
--- a/tests/test_as_channel_first.py
+++ b/tests/test_as_channel_first.py
@@ -34,7 +34,7 @@ def test_value(self, in_type, input_param, expected_shape):
         if isinstance(test_data, torch.Tensor):
             test_data = test_data.cpu().numpy()
         expected = np.moveaxis(test_data, input_param["channel_dim"], 0)
-        assert_allclose(expected, result)
+        assert_allclose(result, expected, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_autoencoder.py b/tests/test_autoencoder.py
index 54d6832c8d..d67964f034 100644
--- a/tests/test_autoencoder.py
+++ b/tests/test_autoencoder.py
@@ -92,7 +92,7 @@ def test_shape(self, input_param, input_shape, expected_shape):
             self.assertEqual(result.shape, expected_shape)
 
     def test_script(self):
-        net = AutoEncoder(dimensions=2, in_channels=1, out_channels=1, channels=(4, 8), strides=(2, 2))
+        net = AutoEncoder(spatial_dims=2, in_channels=1, out_channels=1, channels=(4, 8), strides=(2, 2))
         test_data = torch.randn(2, 1, 32, 32)
         test_script_save(net, test_data)
 
diff --git a/tests/test_basic_unet.py b/tests/test_basic_unet.py
index 09d7f72d0e..1e553b363b 100644
--- a/tests/test_basic_unet.py
+++ b/tests/test_basic_unet.py
@@ -101,7 +101,7 @@ def test_shape(self, input_param, input_shape, expected_shape):
         self.assertEqual(result.shape, expected_shape)
 
     def test_script(self):
-        net = BasicUNet(dimensions=2, in_channels=1, out_channels=3)
+        net = BasicUNet(spatial_dims=2, in_channels=1, out_channels=3)
         test_data = torch.randn(16, 1, 32, 32)
         test_script_save(net, test_data)
 
diff --git a/tests/test_cachedataset.py b/tests/test_cachedataset.py
index bbb8143631..e5bb1b9a90 100644
--- a/tests/test_cachedataset.py
+++ b/tests/test_cachedataset.py
@@ -19,7 +19,7 @@
 from parameterized import parameterized
 
 from monai.data import CacheDataset, DataLoader, PersistentDataset, SmartCacheDataset
-from monai.transforms import Compose, Lambda, LoadImaged, ThreadUnsafe, Transform
+from monai.transforms import Compose, Lambda, LoadImaged, RandLambda, ThreadUnsafe, Transform
 from monai.utils import get_torch_version_tuple
 
 TEST_CASE_1 = [Compose([LoadImaged(keys=["image", "label", "extra"])]), (128, 128, 128)]
@@ -84,7 +84,12 @@ def test_shape(self, transform, expected_shape):
     def test_set_data(self):
         data_list1 = list(range(10))
 
-        transform = Lambda(func=lambda x: np.array([x * 10]))
+        transform = Compose(
+            [
+                Lambda(func=lambda x: np.array([x * 10])),
+                RandLambda(func=lambda x: x + 1),
+            ]
+        )
 
         dataset = CacheDataset(
             data=data_list1,
@@ -92,19 +97,23 @@ def test_set_data(self):
             cache_rate=1.0,
             num_workers=4,
             progress=True,
+            copy_cache=False if sys.platform == "linux" else True,
         )
 
         num_workers = 2 if sys.platform == "linux" else 0
         dataloader = DataLoader(dataset=dataset, num_workers=num_workers, batch_size=1)
         for i, d in enumerate(dataloader):
-            np.testing.assert_allclose([[data_list1[i] * 10]], d)
+            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)
+        # simulate another epoch, the cache content should not be modified
+        for i, d in enumerate(dataloader):
+            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)
 
         # update the datalist and fill the cache content
         data_list2 = list(range(-10, 0))
         dataset.set_data(data=data_list2)
         # rerun with updated cache content
         for i, d in enumerate(dataloader):
-            np.testing.assert_allclose([[data_list2[i] * 10]], d)
+            np.testing.assert_allclose([[data_list2[i] * 10 + 1]], d)
 
 
 class _StatefulTransform(Transform, ThreadUnsafe):
diff --git a/tests/test_cast_to_type.py b/tests/test_cast_to_type.py
index 0ef25cbafa..d06efb17b5 100644
--- a/tests/test_cast_to_type.py
+++ b/tests/test_cast_to_type.py
@@ -16,14 +16,23 @@
 from parameterized import parameterized
 
 from monai.transforms import CastToType
+from monai.utils import optional_import
 from monai.utils.type_conversion import get_equivalent_dtype
 from tests.utils import TEST_NDARRAYS
 
+cp, has_cp = optional_import("cupy")
+
 TESTS = []
 for p in TEST_NDARRAYS:
     for out_dtype in (np.float64, torch.float64):
         TESTS.append([out_dtype, p(np.array([[0, 1], [1, 2]], dtype=np.float32)), out_dtype])
 
+TESTS_CUPY = [
+    [np.float32, np.array([[0, 1], [1, 2]], dtype=np.float32), np.float32],
+    [np.float32, np.array([[0, 1], [1, 2]], dtype=np.uint8), np.float32],
+    [np.uint8, np.array([[0, 1], [1, 2]], dtype=np.float32), np.uint8],
+]
+
 
 class TestCastToType(unittest.TestCase):
     @parameterized.expand(TESTS)
@@ -35,6 +44,19 @@ def test_type(self, out_dtype, input_data, expected_type):
         result = CastToType()(input_data, out_dtype)
         self.assertEqual(result.dtype, get_equivalent_dtype(expected_type, type(result)))
 
+    @parameterized.expand(TESTS_CUPY)
+    @unittest.skipUnless(has_cp, "Requires CuPy")
+    def test_type_cupy(self, out_dtype, input_data, expected_type):
+        input_data = cp.asarray(input_data)
+
+        result = CastToType(dtype=out_dtype)(input_data)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertEqual(result.dtype, get_equivalent_dtype(expected_type, type(result)))
+
+        result = CastToType()(input_data, out_dtype)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertEqual(result.dtype, get_equivalent_dtype(expected_type, type(result)))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_cast_to_typed.py b/tests/test_cast_to_typed.py
index be495564fb..342a677ce1 100644
--- a/tests/test_cast_to_typed.py
+++ b/tests/test_cast_to_typed.py
@@ -16,6 +16,9 @@
 from parameterized import parameterized
 
 from monai.transforms import CastToTyped
+from monai.utils import optional_import
+
+cp, has_cp = optional_import("cupy")
 
 TEST_CASE_1 = [
     {"keys": ["img"], "dtype": np.float64},
@@ -33,6 +36,26 @@
 ]
 
 
+TESTS_CUPY = [
+    [
+        {"keys": "image", "dtype": np.uint8},
+        {
+            "image": np.array([[0, 1], [1, 2]], dtype=np.float32),
+            "label": np.array([[0, 1], [1, 1]], dtype=np.float32),
+        },
+        {"image": np.uint8, "label": np.float32},
+    ],
+    [
+        {"keys": ["image", "label"], "dtype": np.float32},
+        {
+            "image": np.array([[0, 1], [1, 2]], dtype=np.uint8),
+            "label": np.array([[0, 1], [1, 1]], dtype=np.uint8),
+        },
+        {"image": np.float32, "label": np.float32},
+    ],
+]
+
+
 class TestCastToTyped(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_type(self, input_param, input_data, expected_type):
@@ -40,6 +63,16 @@ def test_type(self, input_param, input_data, expected_type):
         for k, v in result.items():
             self.assertEqual(v.dtype, expected_type[k])
 
+    @parameterized.expand(TESTS_CUPY)
+    @unittest.skipUnless(has_cp, "Requires CuPy")
+    def test_type_cupy(self, input_param, input_data, expected_type):
+        input_data = {k: cp.asarray(v) for k, v in input_data.items()}
+
+        result = CastToTyped(**input_param)(input_data)
+        for k, v in result.items():
+            self.assertTrue(isinstance(v, cp.ndarray))
+            self.assertEqual(v.dtype, expected_type[k])
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_center_scale_crop.py b/tests/test_center_scale_crop.py
index e28849ce90..4c5bfc4fac 100644
--- a/tests/test_center_scale_crop.py
+++ b/tests/test_center_scale_crop.py
@@ -38,11 +38,13 @@ class TestCenterScaleCrop(unittest.TestCase):
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_3])
     def test_shape(self, input_param, input_data, expected_shape):
         result = CenterScaleCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result.shape, expected_shape)
 
     @parameterized.expand([TEST_CASE_2])
     def test_value(self, input_param, input_data, expected_value):
         result = CenterScaleCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result, expected_value)
 
 
diff --git a/tests/test_center_spatial_crop.py b/tests/test_center_spatial_crop.py
index 3e828176a5..d6a7edb305 100644
--- a/tests/test_center_spatial_crop.py
+++ b/tests/test_center_spatial_crop.py
@@ -38,11 +38,13 @@ class TestCenterSpatialCrop(unittest.TestCase):
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_3])
     def test_shape(self, input_param, input_data, expected_shape):
         result = CenterSpatialCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result.shape, expected_shape)
 
     @parameterized.expand([TEST_CASE_2])
     def test_value(self, input_param, input_data, expected_value):
         result = CenterSpatialCrop(**input_param)(input_data)
+        self.assertEqual(isinstance(result, torch.Tensor), isinstance(input_data, torch.Tensor))
         np.testing.assert_allclose(result, expected_value)
 
 
diff --git a/tests/test_center_spatial_cropd.py b/tests/test_center_spatial_cropd.py
index 349253ab56..8ffcdf4387 100644
--- a/tests/test_center_spatial_cropd.py
+++ b/tests/test_center_spatial_cropd.py
@@ -15,36 +15,51 @@
 from parameterized import parameterized
 
 from monai.transforms import CenterSpatialCropd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_0 = [
-    {"keys": "img", "roi_size": [2, -1, -1]},
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    (3, 2, 3, 3),
-]
+TEST_SHAPES = []
+for p in TEST_NDARRAYS:
+    TEST_SHAPES.append(
+        [
+            {"keys": "img", "roi_size": [2, -1, -1]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 3, 3),
+        ]
+    )
 
-TEST_CASE_1 = [
-    {"keys": "img", "roi_size": [2, 2, 2]},
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    (3, 2, 2, 2),
-]
+    TEST_SHAPES.append(
+        [
+            {"keys": "img", "roi_size": [2, 2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 2),
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "img", "roi_size": [2, 2]},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[1, 2], [2, 3]]]),
-]
+TEST_CASES = []
+for p in TEST_NDARRAYS:
+    TEST_CASES.append(
+        [
+            {"keys": "img", "roi_size": [2, 2]},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[1, 2], [2, 3]]])),
+        ]
+    )
 
 
 class TestCenterSpatialCropd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
+    @parameterized.expand(TEST_SHAPES)
     def test_shape(self, input_param, input_data, expected_shape):
         result = CenterSpatialCropd(**input_param)(input_data)
         self.assertTupleEqual(result["img"].shape, expected_shape)
 
-    @parameterized.expand([TEST_CASE_2])
+    @parameterized.expand(TEST_CASES)
     def test_value(self, input_param, input_data, expected_value):
         result = CenterSpatialCropd(**input_param)(input_data)
-        np.testing.assert_allclose(result["img"], expected_value)
+        assert_allclose(result["img"], expected_value, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_classes_to_indices.py b/tests/test_classes_to_indices.py
index 0ba3dd094a..7c89e3179d 100644
--- a/tests/test_classes_to_indices.py
+++ b/tests/test_classes_to_indices.py
@@ -11,68 +11,80 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    # test Argmax data
-    {"num_classes": 3, "image_threshold": 0.0},
-    np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-    None,
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+TESTS_CASES = []
+for p in TEST_NDARRAYS:
+    TESTS_CASES.append(
+        [
+            # test Argmax data
+            {"num_classes": 3, "image_threshold": 0.0},
+            p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+            None,
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"num_classes": 3, "image_threshold": 60},
-    np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-    np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS_CASES.append(
+        [
+            {"num_classes": 3, "image_threshold": 60},
+            p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+            p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            [p([0, 8]), p([1, 5, 6]), p([3])],
+        ]
+    )
 
-TEST_CASE_3 = [
-    # test One-Hot data
-    {"image_threshold": 0.0},
-    np.array(
+    TESTS_CASES.append(
         [
-            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+            # test One-Hot data
+            {"image_threshold": 0.0},
+            p(
+                [
+                    [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                    [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                    [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                ]
+            ),
+            None,
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
         ]
-    ),
-    None,
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+    )
 
-TEST_CASE_4 = [
-    {"num_classes": None, "image_threshold": 60},
-    np.array(
+    TESTS_CASES.append(
         [
-            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+            {"num_classes": None, "image_threshold": 60},
+            p(
+                [
+                    [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                    [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                    [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                ]
+            ),
+            p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            [p([0, 8]), p([1, 5, 6]), p([3])],
         ]
-    ),
-    np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    )
 
-TEST_CASE_5 = [
-    # test output_shape
-    {"num_classes": 3, "image_threshold": 0.0, "output_shape": [3, 3]},
-    np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-    None,
-    [np.array([[0, 0], [1, 1], [2, 2]]), np.array([[0, 1], [1, 2], [2, 0]]), np.array([[0, 2], [1, 0], [2, 1]])],
-]
+    TESTS_CASES.append(
+        [
+            # test output_shape
+            {"num_classes": 3, "image_threshold": 0.0, "output_shape": [3, 3]},
+            p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+            None,
+            [p([[0, 0], [1, 1], [2, 2]]), p([[0, 1], [1, 2], [2, 0]]), p([[0, 2], [1, 0], [2, 1]])],
+        ]
+    )
 
 
 class TestClassesToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS_CASES)
     def test_value(self, input_args, label, image, expected_indices):
         indices = ClassesToIndices(**input_args)(label, image)
         for i, e in zip(indices, expected_indices):
-            np.testing.assert_allclose(i, e)
+            assert_allclose(i, e)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_classes_to_indicesd.py b/tests/test_classes_to_indicesd.py
index 67fac95c8c..0df7490ec5 100644
--- a/tests/test_classes_to_indicesd.py
+++ b/tests/test_classes_to_indicesd.py
@@ -11,73 +11,91 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndicesd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    # test Argmax data
-    {"keys": "label", "num_classes": 3, "image_threshold": 0.0},
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+TESTS_CASES = []
+for p in TEST_NDARRAYS:
+    TESTS_CASES.append(
+        [
+            # test Argmax data
+            {"keys": "label", "num_classes": 3, "image_threshold": 0.0},
+            {"label": p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "label", "image_key": "image", "num_classes": 3, "image_threshold": 60},
-    {
-        "label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "num_classes": 3, "image_threshold": 60},
+            {
+                "label": p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
+                "image": p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            },
+            [p([0, 8]), p([1, 5, 6]), p([3])],
+        ]
+    )
 
-TEST_CASE_3 = [
-    # test One-Hot data
-    {"keys": "label", "image_threshold": 0.0},
-    {
-        "label": np.array(
-            [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        )
-    },
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+    TESTS_CASES.append(
+        [
+            # test One-Hot data
+            {"keys": "label", "image_threshold": 0.0},
+            {
+                "label": p(
+                    [
+                        [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                        [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                        [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                    ]
+                )
+            },
+            [p([0, 4, 8]), p([1, 5, 6]), p([2, 3, 7])],
+        ]
+    )
 
-TEST_CASE_4 = [
-    {"keys": "label", "image_key": "image", "num_classes": None, "image_threshold": 60},
-    {
-        "label": np.array(
-            [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        ),
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "num_classes": None, "image_threshold": 60},
+            {
+                "label": p(
+                    [
+                        [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                        [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                        [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                    ]
+                ),
+                "image": p([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
+            },
+            [p([0, 8]), p([1, 5, 6]), p([3])],
+        ]
+    )
 
-TEST_CASE_5 = [
-    # test output_shape
-    {"keys": "label", "indices_postfix": "cls", "num_classes": 3, "image_threshold": 0.0, "output_shape": [3, 3]},
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
-    [np.array([[0, 0], [1, 1], [2, 2]]), np.array([[0, 1], [1, 2], [2, 0]]), np.array([[0, 2], [1, 0], [2, 1]])],
-]
+    TESTS_CASES.append(
+        [
+            # test output_shape
+            {
+                "keys": "label",
+                "indices_postfix": "cls",
+                "num_classes": 3,
+                "image_threshold": 0.0,
+                "output_shape": [3, 3],
+            },
+            {"label": p([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])},
+            [p([[0, 0], [1, 1], [2, 2]]), p([[0, 1], [1, 2], [2, 0]]), p([[0, 2], [1, 0], [2, 1]])],
+        ]
+    )
 
 
 class TestClassesToIndicesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS_CASES)
     def test_value(self, input_args, input_data, expected_indices):
         result = ClassesToIndicesd(**input_args)(input_data)
         key_postfix = input_args.get("indices_postfix")
         key_postfix = "_cls_indices" if key_postfix is None else key_postfix
         for i, e in zip(result["label" + key_postfix], expected_indices):
-            np.testing.assert_allclose(i, e)
+            assert_allclose(i, e)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_convert_data_type.py b/tests/test_convert_data_type.py
index a7fc64f950..e48f6e8854 100644
--- a/tests/test_convert_data_type.py
+++ b/tests/test_convert_data_type.py
@@ -25,6 +25,10 @@
         TESTS.append((in_type(np.array(1.0)), out_type(np.array(1.0))))  # type: ignore
 
 
+class TestTensor(torch.Tensor):
+    pass
+
+
 class TestConvertDataType(unittest.TestCase):
     @parameterized.expand(TESTS)
     def test_convert_data_type(self, in_image, im_out):
@@ -49,7 +53,8 @@ def test_ill_arg(self):
 
 
 class TestConvertDataSame(unittest.TestCase):
-    @parameterized.expand(TESTS)
+    # add test for subclass of Tensor
+    @parameterized.expand(TESTS + [(np.array(1.0), TestTensor(np.array(1.0)))])
     def test_convert_data_type(self, in_image, im_out):
         converted_im, orig_type, orig_device = convert_to_dst_type(in_image, im_out)
         # check input is unchanged
@@ -57,7 +62,11 @@ def test_convert_data_type(self, in_image, im_out):
         if isinstance(in_image, torch.Tensor):
             self.assertEqual(in_image.device, orig_device)
         # check output is desired type
-        self.assertEqual(type(converted_im), type(im_out))
+        if isinstance(im_out, torch.Tensor):
+            output_type = torch.Tensor
+        else:
+            output_type = np.ndarray
+        self.assertEqual(type(converted_im), output_type)
         # check dtype is unchanged
         if isinstance(in_type, (np.ndarray, torch.Tensor)):
             self.assertEqual(converted_im.dtype, im_out.dtype)
diff --git a/tests/test_copy_model_state.py b/tests/test_copy_model_state.py
index 6330a1918a..438c521479 100644
--- a/tests/test_copy_model_state.py
+++ b/tests/test_copy_model_state.py
@@ -21,7 +21,7 @@
 
 class _TestModelOne(torch.nn.Module):
     def __init__(self, n_n, n_m, n_class):
-        super(_TestModelOne, self).__init__()
+        super().__init__()
         self.layer = torch.nn.Linear(n_n, n_m)
         self.class_layer = torch.nn.Linear(n_m, n_class)
 
@@ -33,7 +33,7 @@ def forward(self, x):
 
 class _TestModelTwo(torch.nn.Module):
     def __init__(self, n_n, n_m, n_d, n_class):
-        super(_TestModelTwo, self).__init__()
+        super().__init__()
         self.layer = torch.nn.Linear(n_n, n_m)
         self.layer_1 = torch.nn.Linear(n_m, n_d)
         self.class_layer = torch.nn.Linear(n_d, n_class)
diff --git a/tests/test_correct_crop_centers.py b/tests/test_correct_crop_centers.py
new file mode 100644
index 0000000000..853b3d41d3
--- /dev/null
+++ b/tests/test_correct_crop_centers.py
@@ -0,0 +1,39 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.transforms.utils import correct_crop_centers
+from tests.utils import assert_allclose
+
+TESTS = [
+    [
+        [1, 5, 0],
+        [2, 2, 2],
+        [10, 10, 10],
+    ],
+]
+
+
+class TestCorrectCropCenters(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_torch(self, spatial_size, centers, label_spatial_shape):
+        result1 = correct_crop_centers(centers, spatial_size, label_spatial_shape)
+        centers = [torch.tensor(i) for i in centers]
+        result2 = correct_crop_centers(centers, spatial_size, label_spatial_shape)
+        assert_allclose(result1, result2)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_create_grid_and_affine.py b/tests/test_create_grid_and_affine.py
index 0c0e52e04a..cd8d75f63e 100644
--- a/tests/test_create_grid_and_affine.py
+++ b/tests/test_create_grid_and_affine.py
@@ -12,6 +12,7 @@
 import unittest
 
 import numpy as np
+import torch
 
 from monai.transforms import (
     create_control_grid,
@@ -21,6 +22,7 @@
     create_shear,
     create_translate,
 )
+from tests.utils import assert_allclose, is_tf32_env
 
 
 class TestCreateGrid(unittest.TestCase):
@@ -32,50 +34,47 @@ def test_create_grid(self):
         with self.assertRaisesRegex(TypeError, ""):
             create_grid((1, 1), spacing=2.0)
 
-        g = create_grid((1, 1))
-        expected = np.array([[[0.0]], [[0.0]], [[1.0]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1),), np.array([[[0.0]], [[0.0]], [[1.0]]]))
 
-        g = create_grid((1, 1), homogeneous=False)
-        expected = np.array([[[0.0]], [[0.0]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1), None, False), np.array([[[0.0]], [[0.0]]]))
 
-        g = create_grid((1, 1), spacing=(1.2, 1.3))
-        expected = np.array([[[0.0]], [[0.0]], [[1.0]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1), (1.2, 1.3)), np.array([[[0.0]], [[0.0]], [[1.0]]]))
 
-        g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0))
-        expected = np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]], [[[1.0]]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1, 1), (1.2, 1.3, 1.0)), np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]], [[[1.0]]]]))
 
-        g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0), homogeneous=False)
-        expected = np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]]])
-        np.testing.assert_allclose(g, expected)
+        test_assert(create_grid, ((1, 1, 1), (1.2, 1.3, 1.0), False), np.array([[[[0.0]]], [[[0.0]]], [[[0.0]]]]))
 
         g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0), dtype=np.int32)
         np.testing.assert_equal(g.dtype, np.int32)
 
-        g = create_grid((2, 2, 2))
-        expected = np.array(
-            [
-                [[[-0.5, -0.5], [-0.5, -0.5]], [[0.5, 0.5], [0.5, 0.5]]],
-                [[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, -0.5], [0.5, 0.5]]],
-                [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
-                [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
-            ]
+        g = create_grid((1, 1, 1), spacing=(1.2, 1.3, 1.0), dtype=torch.float64, backend="torch")
+        np.testing.assert_equal(g.dtype, torch.float64)
+
+        test_assert(
+            create_grid,
+            ((2, 2, 2),),
+            np.array(
+                [
+                    [[[-0.5, -0.5], [-0.5, -0.5]], [[0.5, 0.5], [0.5, 0.5]]],
+                    [[[-0.5, -0.5], [0.5, 0.5]], [[-0.5, -0.5], [0.5, 0.5]]],
+                    [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
+                    [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_grid((2, 2, 2), spacing=(1.2, 1.3, 1.0))
-        expected = np.array(
-            [
-                [[[-0.6, -0.6], [-0.6, -0.6]], [[0.6, 0.6], [0.6, 0.6]]],
-                [[[-0.65, -0.65], [0.65, 0.65]], [[-0.65, -0.65], [0.65, 0.65]]],
-                [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
-                [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
-            ]
+        test_assert(
+            create_grid,
+            ((2, 2, 2), (1.2, 1.3, 1.0)),
+            np.array(
+                [
+                    [[[-0.6, -0.6], [-0.6, -0.6]], [[0.6, 0.6], [0.6, 0.6]]],
+                    [[[-0.65, -0.65], [0.65, 0.65]], [[-0.65, -0.65], [0.65, 0.65]]],
+                    [[[-0.5, 0.5], [-0.5, 0.5]], [[-0.5, 0.5], [-0.5, 0.5]]],
+                    [[[1.0, 1.0], [1.0, 1.0]], [[1.0, 1.0], [1.0, 1.0]]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
     def test_create_control_grid(self):
         with self.assertRaisesRegex(TypeError, ""):
@@ -83,72 +82,87 @@ def test_create_control_grid(self):
         with self.assertRaisesRegex(TypeError, ""):
             create_control_grid((1, 1), 2.0)
 
-        g = create_control_grid((1.0, 1.0), (1.0, 1.0))
-        expected = np.array(
-            [
-                [[-1.0, -1.0, -1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]],
-                [[-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0]],
-                [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((1.0, 1.0), (1.0, 1.0)),
+            np.array(
+                [
+                    [[-1.0, -1.0, -1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]],
+                    [[-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0], [-1.0, 0.0, 1.0]],
+                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((1.0, 1.0), (2.0, 2.0))
-        expected = np.array(
-            [
-                [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((1.0, 1.0), (2.0, 2.0)),
+            np.array(
+                [
+                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((2.0, 2.0), (1.0, 1.0))
-        expected = np.array(
-            [
-                [[-1.5, -1.5, -1.5, -1.5], [-0.5, -0.5, -0.5, -0.5], [0.5, 0.5, 0.5, 0.5], [1.5, 1.5, 1.5, 1.5]],
-                [[-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5]],
-                [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((2.0, 2.0), (1.0, 1.0)),
+            np.array(
+                [
+                    [[-1.5, -1.5, -1.5, -1.5], [-0.5, -0.5, -0.5, -0.5], [0.5, 0.5, 0.5, 0.5], [1.5, 1.5, 1.5, 1.5]],
+                    [[-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5], [-1.5, -0.5, 0.5, 1.5]],
+                    [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((2.0, 2.0), (2.0, 2.0))
-        expected = np.array(
-            [
-                [[-3.0, -3.0, -3.0, -3.0], [-1.0, -1.0, -1.0, -1.0], [1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0]],
-                [[-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0]],
-                [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
-            ]
+        test_assert(
+            create_control_grid,
+            ((2.0, 2.0), (2.0, 2.0)),
+            np.array(
+                [
+                    [[-3.0, -3.0, -3.0, -3.0], [-1.0, -1.0, -1.0, -1.0], [1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0]],
+                    [[-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0], [-3.0, -1.0, 1.0, 3.0]],
+                    [[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
-        g = create_control_grid((1.0, 1.0, 1.0), (2.0, 2.0, 2.0), homogeneous=False)
-        expected = np.array(
-            [
-                [
-                    [[-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0]],
-                    [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
-                    [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
-                ],
-                [
-                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                    [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
-                ],
+        test_assert(
+            create_control_grid,
+            ((1.0, 1.0, 1.0), (2.0, 2.0, 2.0), False),
+            np.array(
                 [
-                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                    [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
-                ],
-            ]
+                    [
+                        [[-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0], [-2.0, -2.0, -2.0]],
+                        [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]],
+                        [[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0, 2.0]],
+                    ],
+                    [
+                        [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                        [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                        [[-2.0, -2.0, -2.0], [0.0, 0.0, 0.0], [2.0, 2.0, 2.0]],
+                    ],
+                    [
+                        [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                        [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                        [[-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0], [-2.0, 0.0, 2.0]],
+                    ],
+                ]
+            ),
         )
-        np.testing.assert_allclose(g, expected)
 
 
 def test_assert(func, params, expected):
-    m = func(*params)
-    np.testing.assert_allclose(m, expected, atol=1e-7)
+    gpu_test = ("torch_gpu",) if torch.cuda.is_available() else ()
+    for b in ("torch", "numpy") + gpu_test:
+        if b == "torch_gpu":
+            m = func(*params, device="cuda:0", backend="torch")
+        else:
+            m = func(*params, backend=b)
+        assert_allclose(m, expected, type_test=False, rtol=1e-2 if is_tf32_env() else 1e-5, atol=1e-5)
 
 
 class TestCreateAffine(unittest.TestCase):
diff --git a/tests/test_crop_foreground.py b/tests/test_crop_foreground.py
index 71e488cac8..0bae1f90f3 100644
--- a/tests/test_crop_foreground.py
+++ b/tests/test_crop_foreground.py
@@ -12,60 +12,79 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import CropForeground
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_1 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
-
-TEST_CASE_2 = [
-    {"select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[3]]]),
-]
-
-TEST_CASE_3 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
-
-TEST_CASE_4 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_5 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": [2, 1]},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_6 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 4},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-    np.array([[[1, 2, 1, 0], [2, 3, 2, 0], [1, 2, 1, 0], [0, 0, 0, 0]]]),
-]
-
-TEST_CASE_7 = [
-    {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 10, "constant_values": 2},
-    np.array([[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-    np.zeros((1, 0, 0)),
-]
+TEST_COORDS, TESTS = [], []
+
+for p in TEST_NDARRAYS:
+    TEST_COORDS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
+            p([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[3]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": [2, 1]},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 4},
+            p([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
+            p([[[1, 2, 1, 0], [2, 3, 2, 0], [1, 2, 1, 0], [0, 0, 0, 0]]]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"select_fn": lambda x: x > 0, "channel_indices": None, "margin": 0, "k_divisible": 10},
+            p([[[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
+            p(np.zeros((1, 0, 0), dtype=np.int64)),
+        ]
+    )
 
 
 class TestCropForeground(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6, TEST_CASE_7])
+    @parameterized.expand(TEST_COORDS + TESTS)
     def test_value(self, argments, image, expected_data):
         result = CropForeground(**argments)(image)
-        np.testing.assert_allclose(result, expected_data)
+        torch.testing.assert_allclose(result, expected_data, rtol=1e-7, atol=0)
 
-    @parameterized.expand([TEST_CASE_1])
+    @parameterized.expand(TEST_COORDS)
     def test_return_coords(self, argments, image, _):
         argments["return_coords"] = True
         _, start_coord, end_coord = CropForeground(**argments)(image)
diff --git a/tests/test_crop_foregroundd.py b/tests/test_crop_foregroundd.py
index efe6b65b4b..5fa474d6ac 100644
--- a/tests/test_crop_foregroundd.py
+++ b/tests/test_crop_foregroundd.py
@@ -12,85 +12,128 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import CropForegroundd
-from monai.utils import NumpyPadMode
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {
-        "keys": ["img", "label"],
-        "source_key": "label",
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": 0,
-        "mode": "constant",
-        "constant_values": 2,
-    },
-    {
-        "img": np.array([[[1, 0, 2, 0, 1], [0, 1, 2, 1, 0], [2, 2, 3, 2, 2], [0, 1, 2, 1, 0], [1, 0, 2, 0, 1]]]),
-        "label": np.array([[[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 0, 0, 0, 0]]]),
-    },
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
+TEST_POSITION, TESTS = [], []
+for p in TEST_NDARRAYS:
 
-TEST_CASE_2 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[3]]]),
-]
-
-TEST_CASE_3 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]]),
-]
-
-TEST_CASE_4 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_5 = [
-    {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": None, "margin": [2, 1]},
-    {"img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])},
-    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-]
-
-TEST_CASE_6 = [
-    {
-        "keys": ["img", "seg"],
-        "source_key": "img",
-        "select_fn": lambda x: x > 0,
-        "channel_indices": 0,
-        "margin": 0,
-        "k_divisible": [4, 6],
-        "mode": ["edge", NumpyPadMode.CONSTANT],
-    },
-    {
-        "img": np.array([[[0, 2, 1, 2, 0], [1, 1, 2, 1, 1], [2, 2, 3, 2, 2], [1, 1, 2, 1, 1], [0, 0, 0, 0, 0]]]),
-        "seg": np.array([[[0, 2, 1, 2, 0], [1, 1, 2, 1, 1], [2, 2, 3, 2, 2], [1, 1, 2, 1, 1], [0, 0, 0, 0, 0]]]),
-    },
-    np.array([[[0, 2, 1, 2, 0, 0], [1, 1, 2, 1, 1, 1], [2, 2, 3, 2, 2, 2], [1, 1, 2, 1, 1, 1]]]),
-]
+    TEST_POSITION.append(
+        [
+            {
+                "keys": ["img", "label"],
+                "source_key": "label",
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": 0,
+            },
+            {
+                "img": p(
+                    np.array([[[1, 0, 2, 0, 1], [0, 1, 2, 1, 0], [2, 2, 3, 2, 2], [0, 1, 2, 1, 0], [1, 0, 2, 0, 1]]])
+                ),
+                "label": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 1, 0, 0], [0, 1, 0, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+            },
+            p(np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 1, "channel_indices": None, "margin": 0},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[3]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": 0, "margin": 0},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[1, 2, 1], [2, 3, 2], [1, 2, 1]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "source_key": "img", "select_fn": lambda x: x > 0, "channel_indices": None, "margin": 1},
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "keys": ["img"],
+                "source_key": "img",
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": [2, 1],
+            },
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                )
+            },
+            p(np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "keys": ["img"],
+                "source_key": "img",
+                "select_fn": lambda x: x > 0,
+                "channel_indices": 0,
+                "margin": 0,
+                "k_divisible": [4, 6],
+                "mode": "edge",
+            },
+            {
+                "img": p(
+                    np.array(
+                        [[[0, 2, 1, 2, 0], [1, 1, 2, 1, 1], [2, 2, 3, 2, 2], [1, 1, 2, 1, 1], [0, 0, 0, 0, 0]]],
+                        dtype=np.float32,
+                    )
+                )
+            },
+            p(np.array([[[0, 2, 1, 2, 0, 0], [1, 1, 2, 1, 1, 1], [2, 2, 3, 2, 2, 2], [1, 1, 2, 1, 1, 1]]])),
+        ]
+    )
 
 
 class TestCropForegroundd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
-    def test_value(self, argments, image, expected_data):
-        result = CropForegroundd(**argments)(image)
-        np.testing.assert_allclose(result["img"], expected_data)
+    @parameterized.expand(TEST_POSITION + TESTS)
+    def test_value(self, argments, input_data, expected_data):
+        result = CropForegroundd(**argments)(input_data)
+        r, i = result["img"], input_data["img"]
+        self.assertEqual(type(r), type(i))
+        if isinstance(r, torch.Tensor):
+            self.assertEqual(r.device, i.device)
+        assert_allclose(r, expected_data)
 
-    @parameterized.expand([TEST_CASE_1])
-    def test_foreground_position(self, argments, image, _):
-        result = CropForegroundd(**argments)(image)
+    @parameterized.expand(TEST_POSITION)
+    def test_foreground_position(self, argments, input_data, _):
+        result = CropForegroundd(**argments)(input_data)
         np.testing.assert_allclose(result["foreground_start_coord"], np.array([1, 1]))
         np.testing.assert_allclose(result["foreground_end_coord"], np.array([4, 4]))
 
         argments["start_coord_key"] = "test_start_coord"
         argments["end_coord_key"] = "test_end_coord"
-        result = CropForegroundd(**argments)(image)
+        result = CropForegroundd(**argments)(input_data)
         np.testing.assert_allclose(result["test_start_coord"], np.array([1, 1]))
         np.testing.assert_allclose(result["test_end_coord"], np.array([4, 4]))
 
diff --git a/tests/test_csv_saver.py b/tests/test_csv_saver.py
index 6dd0159322..a279599463 100644
--- a/tests/test_csv_saver.py
+++ b/tests/test_csv_saver.py
@@ -29,7 +29,7 @@ def test_saved_content(self):
             saver.finalize()
             filepath = os.path.join(tempdir, "predictions.csv")
             self.assertTrue(os.path.exists(filepath))
-            with open(filepath, "r") as f:
+            with open(filepath) as f:
                 reader = csv.reader(f)
                 i = 0
                 for row in reader:
diff --git a/tests/test_cucim_dict_transform.py b/tests/test_cucim_dict_transform.py
new file mode 100644
index 0000000000..4936375142
--- /dev/null
+++ b/tests/test_cucim_dict_transform.py
@@ -0,0 +1,141 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import CuCIMd
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_COLOR_JITTER_2 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_ROTATE_1 = [
+    {"name": "image_rotate_90", "k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestCuCIMDict(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        input = {"image": input}
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = {"image": input[cp.newaxis, ...]}
+        expected = expected[cp.newaxis, ...]
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = {"image": cp.asarray(input)}
+        expected = cp.asarray(expected)
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = {"image": cp.asarray(input)[cp.newaxis, ...]}
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        output = CuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_cucim_transform.py b/tests/test_cucim_transform.py
new file mode 100644
index 0000000000..a6c0084c99
--- /dev/null
+++ b/tests/test_cucim_transform.py
@@ -0,0 +1,140 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import CuCIM
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]], [[0.0, 1.0], [1.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_COLOR_JITTER_2 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 1], [2, 3]], [[0, 1], [2, 3]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_ROTATE_1 = [
+    {"name": "image_rotate_90", "k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestCuCIM(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = input[cp.newaxis, ...]
+        expected = expected[cp.newaxis, ...]
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = cp.asarray(input)
+        expected = cp.asarray(expected)
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_COLOR_JITTER_2,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_ROTATE_1,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = cp.asarray(input)[cp.newaxis, ...]
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        output = CuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_cuimage_reader.py b/tests/test_cuimage_reader.py
index 2cbfaec113..34df4b5fe4 100644
--- a/tests/test_cuimage_reader.py
+++ b/tests/test_cuimage_reader.py
@@ -21,10 +21,11 @@
 from monai.data.image_reader import WSIReader
 from monai.utils import optional_import
 
-_, has_cim = optional_import("cucim")
+cucim, has_cucim = optional_import("cucim")
+has_cucim = has_cucim and hasattr(cucim, "CuImage")
 PILImage, has_pil = optional_import("PIL.Image")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 HEIGHT = 32914
@@ -83,7 +84,7 @@
 
 
 class TestCuCIMReader(unittest.TestCase):
-    @skipUnless(has_cim, "Requires CuCIM")
+    @skipUnless(has_cucim, "Requires CuCIM")
     def setUp(self):
         download_url(FILE_URL, FILE_PATH, "5a3cfd4fd725c50578ddb80b517b759f")
 
@@ -112,6 +113,7 @@ def test_read_patches(self, file_path, patch_info, expected_img):
 
     @parameterized.expand([TEST_CASE_RGB_0, TEST_CASE_RGB_1])
     @skipUnless(has_pil, "Requires PIL")
+    @skipUnless(has_cucim and cucim.__version__ == "0.19.0", "Skipped for cicum>0.19.0")
     def test_read_rgba(self, img_expected):
         image = {}
         reader = WSIReader("cuCIM")
diff --git a/tests/test_data_stats.py b/tests/test_data_stats.py
index 50536f2a5c..535b28bcf1 100644
--- a/tests/test_data_stats.py
+++ b/tests/test_data_stats.py
@@ -11,6 +11,7 @@
 
 import logging
 import os
+import sys
 import tempfile
 import unittest
 
@@ -126,7 +127,7 @@
 
 TEST_CASE_8 = [
     np.array([[0, 1], [1, 2]]),
-    "test data statistics:\nType: <class 'numpy.ndarray'>\nShape: (2, 2)\nValue range: (0, 2)\n"
+    "test data statistics:\nType: <class 'numpy.ndarray'> int64\nShape: (2, 2)\nValue range: (0, 2)\n"
     "Value: [[0 1]\n [1 2]]\nAdditional info: 1.0\n",
 ]
 
@@ -159,9 +160,10 @@ def test_file(self, input_data, expected_print):
             for h in _logger.handlers[:]:
                 h.close()
                 _logger.removeHandler(h)
-            with open(filename, "r") as f:
+            with open(filename) as f:
                 content = f.read()
-            self.assertEqual(content, expected_print)
+            if sys.platform != "win32":
+                self.assertEqual(content, expected_print)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_data_statsd.py b/tests/test_data_statsd.py
index aea0f1e721..686d23c4f9 100644
--- a/tests/test_data_statsd.py
+++ b/tests/test_data_statsd.py
@@ -11,6 +11,7 @@
 
 import logging
 import os
+import sys
 import tempfile
 import unittest
 
@@ -147,7 +148,7 @@
 
 TEST_CASE_9 = [
     {"img": np.array([[0, 1], [1, 2]])},
-    "test data statistics:\nType: <class 'numpy.ndarray'>\nShape: (2, 2)\nValue range: (0, 2)\n"
+    "test data statistics:\nType: <class 'numpy.ndarray'> int64\nShape: (2, 2)\nValue range: (0, 2)\n"
     "Value: [[0 1]\n [1 2]]\nAdditional info: 1.0\n",
 ]
 
@@ -192,9 +193,10 @@ def test_file(self, input_data, expected_print):
                 h.close()
                 _logger.removeHandler(h)
             del handler
-            with open(filename, "r") as f:
+            with open(filename) as f:
                 content = f.read()
-            self.assertEqual(content, expected_print)
+            if sys.platform != "win32":
+                self.assertEqual(content, expected_print)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_delete_itemsd.py b/tests/test_delete_itemsd.py
index 7426e39ff0..b7cd104c46 100644
--- a/tests/test_delete_itemsd.py
+++ b/tests/test_delete_itemsd.py
@@ -19,19 +19,36 @@
 
 TEST_CASE_1 = [{"keys": [str(i) for i in range(30)]}, 20]
 
+TEST_CASE_2 = [{"keys": ["image/" + str(i) for i in range(30)], "sep": "/"}, 20]
+
+TEST_CASE_3 = [{"keys": "meta_dict%0008\\|[0-9]", "sep": "%", "use_re": True}]
+
 
 class TestDeleteItemsd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1])
+    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_memory(self, input_param, expected_key_size):
-        input_data = {}
+        input_data = {"image": {}} if "sep" in input_param else {}
         for i in range(50):
-            input_data[str(i)] = [time.time()] * 100000
+            if "sep" in input_param:
+                input_data["image"][str(i)] = [time.time()] * 100000
+            else:
+                input_data[str(i)] = [time.time()] * 100000
         result = DeleteItemsd(**input_param)(input_data)
-        self.assertEqual(len(result.keys()), expected_key_size)
+        if "sep" in input_param:
+            self.assertEqual(len(result["image"].keys()), expected_key_size)
+        else:
+            self.assertEqual(len(result.keys()), expected_key_size)
         self.assertGreaterEqual(
             sys.getsizeof(input_data) * float(expected_key_size) / len(input_data), sys.getsizeof(result)
         )
 
+    @parameterized.expand([TEST_CASE_3])
+    def test_re(self, input_param):
+        input_data = {"image": [1, 2, 3], "meta_dict": {"0008|0005": 1, "0008|1050": 2, "0008test": 3}}
+        result = DeleteItemsd(**input_param)(input_data)
+        self.assertEqual(result["meta_dict"]["0008test"], 3)
+        self.assertTrue(len(result["meta_dict"]), 1)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_deprecated.py b/tests/test_deprecated.py
index 429d5ee767..04f8173282 100644
--- a/tests/test_deprecated.py
+++ b/tests/test_deprecated.py
@@ -222,3 +222,31 @@ def future1():
             warnings.warn("fake warning", DeprecationWarning)
 
         self.assertEqual(aw.warning.args[0], "fake warning")
+
+    def test_arg_except2_unknown(self):
+        """
+        Test deprecated_arg decorator raises exception with `removed` set in the past.
+        with unknown version
+        """
+
+        @deprecated_arg("b", removed=self.prev_version, version_val="0+untagged.1.g3131155")
+        def afoo4(a, b=None):
+            pass
+
+        self.assertRaises(DeprecatedError, lambda: afoo4(1, b=2))
+
+    def test_replacement_arg(self):
+        """
+        Test deprecated arg being replaced.
+        """
+
+        @deprecated_arg("b", new_name="a", since=self.prev_version, version_val=self.test_version)
+        def afoo4(a, b=None):
+            return a
+
+        self.assertEqual(afoo4(b=2), 2)
+        # self.assertRaises(DeprecatedError, lambda: afoo4(1, b=2))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_divisible_pad.py b/tests/test_divisible_pad.py
index ca15b4b347..810d08252c 100644
--- a/tests/test_divisible_pad.py
+++ b/tests/test_divisible_pad.py
@@ -50,11 +50,13 @@ def test_pad_shape(self, input_param, input_data, expected_val):
         self.assertAlmostEqual(result.shape, expected_val.shape)
 
     def test_pad_kwargs(self):
-        padder = DivisiblePad(k=5, mode="constant", constant_values=((0, 0), (1, 1), (2, 2)))
         for p in TEST_NDARRAYS:
-            result = padder(p(np.zeros((3, 8, 4))))
-            result = result.cpu() if isinstance(result, torch.Tensor) else result
-            torch.testing.assert_allclose(result[:, :1, :4], np.ones((3, 1, 4)), rtol=1e-7, atol=0)
+            input_data = p(np.zeros((3, 8, 4)))
+            if isinstance(input_data, np.ndarray):
+                result = DivisiblePad(k=5, mode="constant", constant_values=((0, 0), (1, 1), (2, 2)))(input_data)
+                np.testing.assert_allclose(result[:, :1, :4], np.ones((3, 1, 4)), rtol=1e-7, atol=0)
+            else:
+                result = DivisiblePad(k=5, mode="constant", value=2)(input_data).cpu()
             torch.testing.assert_allclose(result[:, :, 4:5], np.ones((3, 10, 1)) + 1, rtol=1e-7, atol=0)
 
 
diff --git a/tests/test_dynunet.py b/tests/test_dynunet.py
index 81ed239461..18fe146a40 100644
--- a/tests/test_dynunet.py
+++ b/tests/test_dynunet.py
@@ -26,14 +26,14 @@
 expected_shape: Sequence[Any]
 
 TEST_CASE_DYNUNET_2D = []
+out_channels = 2
+in_size = 64
+spatial_dims = 2
 for kernel_size in [(3, 3, 3, 1), ((3, 1), 1, (3, 3), (1, 1))]:
     for strides in [(1, 1, 1, 1), (2, 2, 2, 1)]:
+        expected_shape = (1, out_channels, *[in_size // strides[0]] * spatial_dims)
         for in_channels in [2, 3]:
             for res_block in [True, False]:
-                out_channels = 2
-                in_size = 64
-                spatial_dims = 2
-                expected_shape = (1, out_channels, *[in_size // strides[0]] * spatial_dims)
                 test_case = [
                     {
                         "spatial_dims": spatial_dims,
@@ -45,6 +45,7 @@
                         "norm_name": "batch",
                         "deep_supervision": False,
                         "res_block": res_block,
+                        "dropout": None,
                     },
                     (1, in_channels, in_size, in_size),
                     expected_shape,
@@ -52,11 +53,11 @@
                 TEST_CASE_DYNUNET_2D.append(test_case)
 
 TEST_CASE_DYNUNET_3D = []  # in 3d cases, also test anisotropic kernel/strides
+in_channels = 1
+in_size = 64
 for out_channels in [2, 3]:
+    expected_shape = (1, out_channels, 64, 32, 64)
     for res_block in [True, False]:
-        in_channels = 1
-        in_size = 64
-        expected_shape = (1, out_channels, 64, 32, 64)
         test_case = [
             {
                 "spatial_dims": 3,
@@ -68,6 +69,7 @@
                 "norm_name": ("INSTANCE", {"affine": True}),
                 "deep_supervision": False,
                 "res_block": res_block,
+                "dropout": ("alphadropout", {"p": 0.25}),
             },
             (1, in_channels, in_size, in_size, in_size),
             expected_shape,
diff --git a/tests/test_efficientnet.py b/tests/test_efficientnet.py
index 6befba108a..20c7123d7f 100644
--- a/tests/test_efficientnet.py
+++ b/tests/test_efficientnet.py
@@ -44,7 +44,7 @@
 
 
 def get_model_names():
-    return ["efficientnet-b{}".format(d) for d in range(8)]
+    return [f"efficientnet-b{d}" for d in range(8)]
 
 
 def get_expected_model_shape(model_name):
diff --git a/tests/test_ensure_type.py b/tests/test_ensure_type.py
index 8feb96ed37..64094b2360 100644
--- a/tests/test_ensure_type.py
+++ b/tests/test_ensure_type.py
@@ -25,9 +25,11 @@ def test_array_input(self):
             test_datas.append(test_datas[-1].cuda())
         for test_data in test_datas:
             for dtype in ("tensor", "NUMPY"):
-                result = EnsureType(data_type=dtype)(test_data)
+                result = EnsureType(dtype, dtype=np.float32 if dtype == "NUMPY" else None, device="cpu")(test_data)
+                if dtype == "NUMPY":
+                    self.assertTrue(result.dtype == np.float32)
                 self.assertTrue(isinstance(result, torch.Tensor if dtype == "tensor" else np.ndarray))
-                assert_allclose(result, test_data)
+                assert_allclose(result, test_data, type_test=False)
                 self.assertTupleEqual(result.shape, (2, 2))
 
     def test_single_input(self):
@@ -36,12 +38,12 @@ def test_single_input(self):
             test_datas.append(test_datas[-1].cuda())
         for test_data in test_datas:
             for dtype in ("tensor", "numpy"):
-                result = EnsureType(data_type=dtype)(test_data)
+                result = EnsureType(data_type=dtype, device="cpu")(test_data)
                 self.assertTrue(isinstance(result, torch.Tensor if dtype == "tensor" else np.ndarray))
                 if isinstance(test_data, bool):
                     self.assertFalse(result)
                 else:
-                    assert_allclose(result, test_data)
+                    assert_allclose(result, test_data, type_test=False)
                 self.assertEqual(result.ndim, 0)
 
     def test_string(self):
diff --git a/tests/test_ensure_typed.py b/tests/test_ensure_typed.py
index 96f482afc2..a78df6cb3f 100644
--- a/tests/test_ensure_typed.py
+++ b/tests/test_ensure_typed.py
@@ -25,9 +25,16 @@ def test_array_input(self):
             test_datas.append(test_datas[-1].cuda())
         for test_data in test_datas:
             for dtype in ("tensor", "NUMPY"):
-                result = EnsureTyped(keys="data", data_type=dtype)({"data": test_data})["data"]
+                result = EnsureTyped(
+                    keys="data",
+                    data_type=dtype,
+                    dtype=np.float32 if dtype == "NUMPY" else None,
+                    device="cpu",
+                )({"data": test_data})["data"]
+                if dtype == "NUMPY":
+                    self.assertTrue(result.dtype == np.float32)
                 self.assertTrue(isinstance(result, torch.Tensor if dtype == "tensor" else np.ndarray))
-                assert_allclose(result, test_data)
+                assert_allclose(result, test_data, type_test=False)
                 self.assertTupleEqual(result.shape, (2, 2))
 
     def test_single_input(self):
@@ -41,7 +48,7 @@ def test_single_input(self):
                 if isinstance(test_data, bool):
                     self.assertFalse(result)
                 else:
-                    assert_allclose(result, test_data)
+                    assert_allclose(result, test_data, type_test=False)
                 self.assertEqual(result.ndim, 0)
 
     def test_string(self):
@@ -75,7 +82,7 @@ def test_dict(self):
             "extra": None,
         }
         for dtype in ("tensor", "numpy"):
-            result = EnsureTyped(keys="data", data_type=dtype)({"data": test_data})["data"]
+            result = EnsureTyped(keys="data", data_type=dtype, device="cpu")({"data": test_data})["data"]
             self.assertTrue(isinstance(result, dict))
             self.assertTrue(isinstance(result["img"], torch.Tensor if dtype == "tensor" else np.ndarray))
             torch.testing.assert_allclose(result["img"], torch.as_tensor([1.0, 2.0]))
diff --git a/tests/test_fg_bg_to_indices.py b/tests/test_fg_bg_to_indices.py
index 98626c7028..0d35dd23f8 100644
--- a/tests/test_fg_bg_to_indices.py
+++ b/tests/test_fg_bg_to_indices.py
@@ -11,58 +11,70 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import FgBgToIndices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"image_threshold": 0.0, "output_shape": None},
-    np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-    None,
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 4, 8]),
-]
+TESTS_CASES = []
+for p in TEST_NDARRAYS:
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 0.0, "output_shape": None},
+            p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
+            None,
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 4, 8]),
+        ]
+    )
 
-TEST_CASE_2 = [
-    {"image_threshold": 0.0, "output_shape": None},
-    np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-    np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]]),
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 0.0, "output_shape": None},
+            p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
+            p([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]]),
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_3 = [
-    {"image_threshold": 1.0, "output_shape": None},
-    np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-    np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 1.0, "output_shape": None},
+            p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
+            p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_4 = [
-    {"image_threshold": 1.0, "output_shape": None},
-    np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
-    np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 1.0, "output_shape": None},
+            p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
+            p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_5 = [
-    {"image_threshold": 1.0, "output_shape": [3, 3]},
-    np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
-    np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-    np.array([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
-    np.array([[0, 0], [2, 2]]),
-]
+    TESTS_CASES.append(
+        [
+            {"image_threshold": 1.0, "output_shape": [3, 3]},
+            p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
+            p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
+            p([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
+            p([[0, 0], [2, 2]]),
+        ]
+    )
 
 
 class TestFgBgToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS_CASES)
     def test_type_shape(self, input_data, label, image, expected_fg, expected_bg):
         fg_indices, bg_indices = FgBgToIndices(**input_data)(label, image)
-        np.testing.assert_allclose(fg_indices, expected_fg)
-        np.testing.assert_allclose(bg_indices, expected_bg)
+        assert_allclose(fg_indices, expected_fg)
+        assert_allclose(bg_indices, expected_bg)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_fg_bg_to_indicesd.py b/tests/test_fg_bg_to_indicesd.py
index ce6ca30f1b..4691526d94 100644
--- a/tests/test_fg_bg_to_indicesd.py
+++ b/tests/test_fg_bg_to_indicesd.py
@@ -11,53 +11,66 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import FgBgToIndicesd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "label", "image_key": None, "image_threshold": 0.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 4, 8]),
-]
+TEST_CASES = []
+for p in TEST_NDARRAYS:
 
-TEST_CASE_2 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 0.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": None, "image_threshold": 0.0, "output_shape": None},
+            {"label": p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 4, 8]),
+        ]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 0.0, "output_shape": None},
+            {"label": p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": p([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_4 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
-    {"label": np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
+            {"label": p([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
 
-TEST_CASE_5 = [
-    {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": [3, 3]},
-    {"label": np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
-    np.array([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
-    np.array([[0, 0], [2, 2]]),
-]
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": None},
+            {"label": p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
+            p([1, 2, 3, 5, 6, 7]),
+            p([0, 8]),
+        ]
+    )
+
+    TEST_CASES.append(
+        [
+            {"keys": "label", "image_key": "image", "image_threshold": 1.0, "output_shape": [3, 3]},
+            {"label": p([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]), "image": p([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])},
+            p([[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]]),
+            p([[0, 0], [2, 2]]),
+        ]
+    )
 
 
 class TestFgBgToIndicesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TEST_CASES)
     def test_type_shape(self, input_data, data, expected_fg, expected_bg):
         result = FgBgToIndicesd(**input_data)(data)
-        np.testing.assert_allclose(result["label_fg_indices"], expected_fg)
-        np.testing.assert_allclose(result["label_bg_indices"], expected_bg)
+        assert_allclose(result["label_fg_indices"], expected_fg)
+        assert_allclose(result["label_bg_indices"], expected_bg)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_flip.py b/tests/test_flip.py
index 404a3def7d..8547f8aeb4 100644
--- a/tests/test_flip.py
+++ b/tests/test_flip.py
@@ -34,12 +34,10 @@ def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             im = p(self.imt[0])
             flip = Flip(spatial_axis=spatial_axis)
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
             result = flip(im)
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_flipd.py b/tests/test_flipd.py
index 1676723800..2fa783f8ad 100644
--- a/tests/test_flipd.py
+++ b/tests/test_flipd.py
@@ -33,12 +33,10 @@ def test_invalid_cases(self, _, spatial_axis, raises):
     def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             flip = Flipd(keys="img", spatial_axis=spatial_axis)
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
             result = flip({"img": p(self.imt[0])})["img"]
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian_sharpen.py b/tests/test_gaussian_sharpen.py
index 9d078e65e5..9130e33656 100644
--- a/tests/test_gaussian_sharpen.py
+++ b/tests/test_gaussian_sharpen.py
@@ -11,50 +11,79 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import GaussianSharpen
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[4.1081963, 3.4950666, 4.1081963], [3.7239995, 2.8491793, 3.7239995], [4.569839, 3.9529324, 4.569839]],
-            [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+            {},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.1081963, 3.4950666, 4.1081963],
+                        [3.7239995, 2.8491793, 3.7239995],
+                        [4.569839, 3.9529324, 4.569839],
+                    ],
+                    [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.513644, 4.869134, 4.513644], [8.467242, 9.4004135, 8.467242], [10.416813, 12.0653515, 10.416813]],
-            [[15.711488, 17.569994, 15.711488], [21.16811, 23.501041, 21.16811], [21.614658, 24.766209, 21.614658]],
+            {"sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.513644, 4.869134, 4.513644],
+                        [8.467242, 9.4004135, 8.467242],
+                        [10.416813, 12.0653515, 10.416813],
+                    ],
+                    [
+                        [15.711488, 17.569994, 15.711488],
+                        [21.16811, 23.501041, 21.16811],
+                        [21.614658, 24.766209, 21.614658],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[3.3324685, 3.335536, 3.3324673], [7.7666636, 8.16056, 7.7666636], [12.662973, 14.317837, 12.6629715]],
-            [[15.329051, 16.57557, 15.329051], [19.41665, 20.40139, 19.416655], [24.659554, 27.557873, 24.659554]],
+            {"sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [3.3324685, 3.335536, 3.3324673],
+                        [7.7666636, 8.16056, 7.7666636],
+                        [12.662973, 14.317837, 12.6629715],
+                    ],
+                    [
+                        [15.329051, 16.57557, 15.329051],
+                        [19.41665, 20.40139, 19.416655],
+                        [24.659554, 27.557873, 24.659554],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSharpen(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSharpen(**argments)(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, atol=0, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian_sharpend.py b/tests/test_gaussian_sharpend.py
index c795b11762..4b84eb9c12 100644
--- a/tests/test_gaussian_sharpend.py
+++ b/tests/test_gaussian_sharpend.py
@@ -15,46 +15,75 @@
 from parameterized import parameterized
 
 from monai.transforms import GaussianSharpend
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img"},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[4.1081963, 3.4950666, 4.1081963], [3.7239995, 2.8491793, 3.7239995], [4.569839, 3.9529324, 4.569839]],
-            [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+            {"keys": "img"},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.1081963, 3.4950666, 4.1081963],
+                        [3.7239995, 2.8491793, 3.7239995],
+                        [4.569839, 3.9529324, 4.569839],
+                    ],
+                    [[10.616725, 9.081067, 10.616725], [9.309998, 7.12295, 9.309998], [11.078365, 9.538931, 11.078365]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "img", "sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.513644, 4.869134, 4.513644], [8.467242, 9.4004135, 8.467242], [10.416813, 12.0653515, 10.416813]],
-            [[15.711488, 17.569994, 15.711488], [21.16811, 23.501041, 21.16811], [21.614658, 24.766209, 21.614658]],
+            {"keys": "img", "sigma1": 1.0, "sigma2": 0.75, "alpha": 20},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.513644, 4.869134, 4.513644],
+                        [8.467242, 9.4004135, 8.467242],
+                        [10.416813, 12.0653515, 10.416813],
+                    ],
+                    [
+                        [15.711488, 17.569994, 15.711488],
+                        [21.16811, 23.501041, 21.16811],
+                        [21.614658, 24.766209, 21.614658],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "img", "sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[3.3324685, 3.335536, 3.3324673], [7.7666636, 8.16056, 7.7666636], [12.662973, 14.317837, 12.6629715]],
-            [[15.329051, 16.57557, 15.329051], [19.41665, 20.40139, 19.416655], [24.659554, 27.557873, 24.659554]],
+            {"keys": "img", "sigma1": (0.5, 1.0), "sigma2": (0.5, 0.75), "alpha": 20},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [3.3324685, 3.335536, 3.3324673],
+                        [7.7666636, 8.16056, 7.7666636],
+                        [12.662973, 14.317837, 12.6629715],
+                    ],
+                    [
+                        [15.329051, 16.57557, 15.329051],
+                        [19.41665, 20.40139, 19.416655],
+                        [24.659554, 27.557873, 24.659554],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSharpend(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSharpend(**argments)(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian_smooth.py b/tests/test_gaussian_smooth.py
index e51977fbee..24ecfb88e8 100644
--- a/tests/test_gaussian_smooth.py
+++ b/tests/test_gaussian_smooth.py
@@ -11,54 +11,83 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import GaussianSmooth
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"sigma": 1.5},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [
-                [0.59167546, 0.69312394, 0.59167546],
-                [0.7956997, 0.93213004, 0.7956997],
-                [0.7668002, 0.8982755, 0.7668002],
-            ],
-            [[1.6105323, 1.8866735, 1.6105323], [1.9892492, 2.3303251, 1.9892492], [1.7856569, 2.091825, 1.7856569]],
+            {"sigma": 1.5},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [0.59167546, 0.69312394, 0.59167546],
+                        [0.7956997, 0.93213004, 0.7956997],
+                        [0.7668002, 0.8982755, 0.7668002],
+                    ],
+                    [
+                        [1.6105323, 1.8866735, 1.6105323],
+                        [1.9892492, 2.3303251, 1.9892492],
+                        [1.7856569, 2.091825, 1.7856569],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"sigma": 0.5},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.8424794, 0.99864554, 0.8424794], [1.678146, 1.9892154, 1.678146], [1.9889624, 2.3576462, 1.9889624]],
-            [[2.966061, 3.5158648, 2.966061], [4.1953645, 4.973038, 4.1953645], [4.112544, 4.8748655, 4.1125436]],
+            {"sigma": 0.5},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [0.8424794, 0.99864554, 0.8424794],
+                        [1.678146, 1.9892154, 1.678146],
+                        [1.9889624, 2.3576462, 1.9889624],
+                    ],
+                    [
+                        [2.966061, 3.5158648, 2.966061],
+                        [4.1953645, 4.973038, 4.1953645],
+                        [4.112544, 4.8748655, 4.1125436],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"sigma": [1.5, 0.5]},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.8542037, 1.0125432, 0.8542037], [1.1487541, 1.3616928, 1.1487541], [1.1070318, 1.3122368, 1.1070318]],
-            [[2.3251305, 2.756128, 2.3251305], [2.8718853, 3.4042323, 2.8718853], [2.5779586, 3.0558217, 2.5779586]],
+            {"sigma": [1.5, 0.5]},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [0.8542037, 1.0125432, 0.8542037],
+                        [1.1487541, 1.3616928, 1.1487541],
+                        [1.1070318, 1.3122368, 1.1070318],
+                    ],
+                    [
+                        [2.3251305, 2.756128, 2.3251305],
+                        [2.8718853, 3.4042323, 2.8718853],
+                        [2.5779586, 3.0558217, 2.5779586],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSmooth(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSmooth(**argments)(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, atol=0, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gaussian_smoothd.py b/tests/test_gaussian_smoothd.py
index 3d7eb6195e..ae358dd59a 100644
--- a/tests/test_gaussian_smoothd.py
+++ b/tests/test_gaussian_smoothd.py
@@ -15,50 +15,79 @@
 from parameterized import parameterized
 
 from monai.transforms import GaussianSmoothd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img", "sigma": 1.5},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [
-                [0.59167546, 0.69312394, 0.59167546],
-                [0.7956997, 0.93213004, 0.7956997],
-                [0.7668002, 0.8982755, 0.7668002],
-            ],
-            [[1.6105323, 1.8866735, 1.6105323], [1.9892492, 2.3303251, 1.9892492], [1.7856569, 2.091825, 1.7856569]],
+            {"keys": "img", "sigma": 1.5},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.59167546, 0.69312394, 0.59167546],
+                        [0.7956997, 0.93213004, 0.7956997],
+                        [0.7668002, 0.8982755, 0.7668002],
+                    ],
+                    [
+                        [1.6105323, 1.8866735, 1.6105323],
+                        [1.9892492, 2.3303251, 1.9892492],
+                        [1.7856569, 2.091825, 1.7856569],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "img", "sigma": 0.5},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.8424794, 0.99864554, 0.8424794], [1.678146, 1.9892154, 1.678146], [1.9889624, 2.3576462, 1.9889624]],
-            [[2.966061, 3.5158648, 2.966061], [4.1953645, 4.973038, 4.1953645], [4.112544, 4.8748655, 4.1125436]],
+            {"keys": "img", "sigma": 0.5},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.8424794, 0.99864554, 0.8424794],
+                        [1.678146, 1.9892154, 1.678146],
+                        [1.9889624, 2.3576462, 1.9889624],
+                    ],
+                    [
+                        [2.966061, 3.5158648, 2.966061],
+                        [4.1953645, 4.973038, 4.1953645],
+                        [4.112544, 4.8748655, 4.1125436],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "img", "sigma": [1.5, 0.5]},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.8542037, 1.0125432, 0.8542037], [1.1487541, 1.3616928, 1.1487541], [1.1070318, 1.3122368, 1.1070318]],
-            [[2.3251305, 2.756128, 2.3251305], [2.8718853, 3.4042323, 2.8718853], [2.5779586, 3.0558217, 2.5779586]],
+            {"keys": "img", "sigma": [1.5, 0.5]},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.8542037, 1.0125432, 0.8542037],
+                        [1.1487541, 1.3616928, 1.1487541],
+                        [1.1070318, 1.3122368, 1.1070318],
+                    ],
+                    [
+                        [2.3251305, 2.756128, 2.3251305],
+                        [2.8718853, 3.4042323, 2.8718853],
+                        [2.5779586, 3.0558217, 2.5779586],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestGaussianSmoothd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = GaussianSmoothd(**argments)(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_generalized_wasserstein_dice_loss.py b/tests/test_generalized_wasserstein_dice_loss.py
index 295a4a6d70..5ad946d20d 100644
--- a/tests/test_generalized_wasserstein_dice_loss.py
+++ b/tests/test_generalized_wasserstein_dice_loss.py
@@ -159,7 +159,7 @@ def test_convergence(self):
         # define a model with one layer
         class OnelayerNet(nn.Module):
             def __init__(self):
-                super(OnelayerNet, self).__init__()
+                super().__init__()
                 self.layer = nn.Linear(num_voxels, num_voxels * num_classes)
 
             def forward(self, x):
diff --git a/tests/test_generate_label_classes_crop_centers.py b/tests/test_generate_label_classes_crop_centers.py
index 38f2a3e0d1..0e40750276 100644
--- a/tests/test_generate_label_classes_crop_centers.py
+++ b/tests/test_generate_label_classes_crop_centers.py
@@ -10,11 +10,13 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import generate_label_classes_crop_centers
+from monai.utils.misc import set_determinism
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {
@@ -23,7 +25,6 @@
         "ratios": [1, 2],
         "label_spatial_shape": [3, 3, 3],
         "indices": [[3, 12, 21], [1, 9, 18]],
-        "rand_state": np.random.RandomState(),
     },
     list,
     2,
@@ -37,7 +38,6 @@
         "ratios": None,
         "label_spatial_shape": [3, 3, 3],
         "indices": [[3, 12, 21], [1, 9, 18]],
-        "rand_state": np.random.RandomState(),
     },
     list,
     1,
@@ -48,10 +48,21 @@
 class TestGenerateLabelClassesCropCenters(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_type_shape(self, input_data, expected_type, expected_count, expected_shape):
-        result = generate_label_classes_crop_centers(**input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertEqual(len(result), expected_count)
-        self.assertEqual(len(result[0]), expected_shape)
+        results = []
+        for p in TEST_NDARRAYS + (None,):
+            input_data = deepcopy(input_data)
+            if p is not None:
+                input_data["indices"] = p(input_data["indices"])
+            set_determinism(0)
+            result = generate_label_classes_crop_centers(**input_data)
+            self.assertIsInstance(result, expected_type)
+            self.assertEqual(len(result), expected_count)
+            self.assertEqual(len(result[0]), expected_shape)
+            # check for consistency between numpy, torch and torch.cuda
+            results.append(result)
+            if len(results) > 1:
+                for x, y in zip(result[0], result[-1]):
+                    assert_allclose(x, y, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_generate_pos_neg_label_crop_centers.py b/tests/test_generate_pos_neg_label_crop_centers.py
index 40181aa9ea..b8f2840757 100644
--- a/tests/test_generate_pos_neg_label_crop_centers.py
+++ b/tests/test_generate_pos_neg_label_crop_centers.py
@@ -10,35 +10,52 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import generate_pos_neg_label_crop_centers
-
-TEST_CASE_1 = [
-    {
-        "spatial_size": [2, 2, 2],
-        "num_samples": 2,
-        "pos_ratio": 1.0,
-        "label_spatial_shape": [3, 3, 3],
-        "fg_indices": [1, 9, 18],
-        "bg_indices": [3, 12, 21],
-        "rand_state": np.random.RandomState(),
-    },
-    list,
-    2,
-    3,
-]
+from monai.utils.misc import set_determinism
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+TESTS.append(
+    [
+        {
+            "spatial_size": [2, 2, 2],
+            "num_samples": 2,
+            "pos_ratio": 1.0,
+            "label_spatial_shape": [3, 3, 3],
+            "fg_indices": [1, 9, 18],
+            "bg_indices": [3, 12, 21],
+        },
+        list,
+        2,
+        3,
+    ]
+)
 
 
 class TestGeneratePosNegLabelCropCenters(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1])
+    @parameterized.expand(TESTS)
     def test_type_shape(self, input_data, expected_type, expected_count, expected_shape):
-        result = generate_pos_neg_label_crop_centers(**input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertEqual(len(result), expected_count)
-        self.assertEqual(len(result[0]), expected_shape)
+        results = []
+        for p in TEST_NDARRAYS + (None,):
+            input_data = deepcopy(input_data)
+            if p is not None:
+                for k in ["fg_indices", "bg_indices"]:
+                    input_data[k] = p(input_data[k])
+            set_determinism(0)
+            result = generate_pos_neg_label_crop_centers(**input_data)
+            self.assertIsInstance(result, expected_type)
+            self.assertEqual(len(result), expected_count)
+            self.assertEqual(len(result[0]), expected_shape)
+            # check for consistency between numpy, torch and torch.cuda
+            results.append(result)
+            if len(results) > 1:
+                # compare every crop center
+                for x, y in zip(results[0], results[-1]):
+                    assert_allclose(x, y, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_generate_spatial_bounding_box.py b/tests/test_generate_spatial_bounding_box.py
index 32a45d8d1c..d73b9fafcc 100644
--- a/tests/test_generate_spatial_bounding_box.py
+++ b/tests/test_generate_spatial_bounding_box.py
@@ -15,60 +15,79 @@
 from parameterized import parameterized
 
 from monai.transforms import generate_spatial_bounding_box
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_1 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": 0,
-    },
-    ([1, 1], [4, 4]),
-]
-
-TEST_CASE_2 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 1,
-        "channel_indices": None,
-        "margin": 0,
-    },
-    ([2, 2], [3, 3]),
-]
-
-TEST_CASE_3 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": 0,
-        "margin": 0,
-    },
-    ([1, 1], [4, 4]),
-]
-
-TEST_CASE_4 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": 1,
-    },
-    ([0, 0], [4, 5]),
-]
-
-TEST_CASE_5 = [
-    {
-        "img": np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]]),
-        "select_fn": lambda x: x > 0,
-        "channel_indices": None,
-        "margin": [2, 1],
-    },
-    ([0, 0], [5, 5]),
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": 0,
+            },
+            ([1, 1], [4, 4]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 1, 1, 0], [0, 1, 3, 1, 0], [0, 1, 1, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 1,
+                "channel_indices": None,
+                "margin": 0,
+            },
+            ([2, 2], [3, 3]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 1, 2, 1, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": 0,
+                "margin": 0,
+            },
+            ([1, 1], [4, 4]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": 1,
+            },
+            ([0, 0], [4, 5]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "img": p(
+                    np.array([[[0, 0, 0, 0, 0], [0, 1, 2, 1, 0], [0, 2, 3, 2, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]])
+                ),
+                "select_fn": lambda x: x > 0,
+                "channel_indices": None,
+                "margin": [2, 1],
+            },
+            ([0, 0], [5, 5]),
+        ]
+    )
 
 
 class TestGenerateSpatialBoundingBox(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5])
+    @parameterized.expand(TESTS)
     def test_value(self, input_data, expected_box):
         result = generate_spatial_bounding_box(**input_data)
         self.assertTupleEqual(result, expected_box)
diff --git a/tests/test_get_equivalent_dtype.py b/tests/test_get_equivalent_dtype.py
index 04ba5ae5fb..de2379b15b 100644
--- a/tests/test_get_equivalent_dtype.py
+++ b/tests/test_get_equivalent_dtype.py
@@ -32,6 +32,14 @@ def test_get_equivalent_dtype(self, im, input_dtype):
         out_dtype = get_equivalent_dtype(input_dtype, type(im))
         self.assertEqual(out_dtype, im.dtype)
 
+    def test_native_type(self):
+        """the get_equivalent_dtype currently doesn't change the build-in type"""
+        n_type = [float, int, bool]
+        for n in n_type:
+            for im_dtype in DTYPES:
+                out_dtype = get_equivalent_dtype(n, type(im_dtype))
+                self.assertEqual(out_dtype, n)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_gibbs_noise.py b/tests/test_gibbs_noise.py
index 264e2e630a..2c5e117eaf 100644
--- a/tests/test_gibbs_noise.py
+++ b/tests/test_gibbs_noise.py
@@ -19,17 +19,17 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import GibbsNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestGibbsNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -39,36 +39,39 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         im = create_test_image(*im_shape, num_objs=4, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        return input_type(im)
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = 0.8
-        t = GibbsNoise(alpha, as_tensor_output)
+        t = GibbsNoise(alpha)
         out1 = t(deepcopy(im))
         out2 = t(deepcopy(im))
-        np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
+        self.assertEqual(type(out1), type(im))
+        if isinstance(out1, torch.Tensor):
+            self.assertEqual(out1.device, im.device)
+        torch.testing.assert_allclose(out1, out2, rtol=1e-7, atol=0)
+        self.assertIsInstance(out1, type(im))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = 0.0
         t = GibbsNoise(alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(im, out, atol=1e-2)
+        torch.testing.assert_allclose(im, out, atol=1e-2, rtol=1e-7)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = 1.0
         t = GibbsNoise(alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(0 * im, out)
+        torch.testing.assert_allclose(0 * im, out, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_gibbs_noised.py b/tests/test_gibbs_noised.py
index 558556489a..f02052818f 100644
--- a/tests/test_gibbs_noised.py
+++ b/tests/test_gibbs_noised.py
@@ -19,19 +19,18 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import GibbsNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
-
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 KEYS = ["im", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestGibbsNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -41,49 +40,56 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
+        return {k: input_type(deepcopy(v)) for k, v in zip(KEYS, ims)}
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = 0.8
-        t = GibbsNoised(KEYS, alpha, as_tensor_output)
+        t = GibbsNoised(KEYS, alpha)
         out1 = t(deepcopy(data))
         out2 = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(out1[k], out2[k])
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
+            torch.testing.assert_allclose(out1[k], out2[k], rtol=1e-7, atol=0)
+            self.assertIsInstance(out1[k], type(data[k]))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = 0.0
         t = GibbsNoised(KEYS, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
             np.testing.assert_allclose(data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = 1.0
         t = GibbsNoised(KEYS, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(0 * data[k], out[k])
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
+            np.testing.assert_allclose(0.0 * data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_dict_matches(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_dict_matches(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
         alpha = 1.0
         t = GibbsNoised(KEYS, alpha)
         out = t(deepcopy(data))
-        np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
+        torch.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]], rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_global_mutual_information_loss.py b/tests/test_global_mutual_information_loss.py
index 3373b59621..6a658563bc 100644
--- a/tests/test_global_mutual_information_loss.py
+++ b/tests/test_global_mutual_information_loss.py
@@ -16,6 +16,7 @@
 from parameterized import parameterized
 
 from monai.losses.image_dissimilarity import GlobalMutualInformationLoss
+from tests.utils import SkipIfBeforePyTorchVersion
 
 device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -45,6 +46,31 @@
         },
         -1.083999,
     ],
+    [
+        {"kernel_type": "b-spline"},
+        {
+            "pred": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3),
+            "target": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3),
+        },
+        -1.0986018,
+    ],
+    [
+        {"kernel_type": "b-spline"},
+        {
+            "pred": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3),
+            "target": torch.arange(0, 3, dtype=torch.float, device=device)[None, :, None, None, None]
+            .expand(1, 3, 3, 3, 3)
+            .div(3)
+            ** 2,
+        },
+        -1.09861,
+    ],
     [
         {},
         {
@@ -85,9 +111,10 @@
 
 class TestGlobalMutualInformationLoss(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
+    @SkipIfBeforePyTorchVersion((1, 9))
     def test_shape(self, input_param, input_data, expected_val):
         result = GlobalMutualInformationLoss(**input_param).forward(**input_data)
-        np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, rtol=1e-4)
+        np.testing.assert_allclose(result.detach().cpu().numpy(), expected_val, rtol=1e-3, atol=1e-3)
 
     def test_ill_shape(self):
         loss = GlobalMutualInformationLoss()
diff --git a/tests/test_grid_pull.py b/tests/test_grid_pull.py
index 9e4d2e8237..cbfd8a9590 100644
--- a/tests/test_grid_pull.py
+++ b/tests/test_grid_pull.py
@@ -85,7 +85,7 @@ def test_grid_pull(self, input_param, expected):
             grads = grads[0]
         else:
             grads = torch.cat(grads, dim=0)
-        self.assertTrue("{}".format(result.device).startswith(expected["device"]))
+        self.assertTrue(f"{result.device}".startswith(expected["device"]))
         np.testing.assert_allclose(result.detach().cpu().numpy(), expected["val"].cpu().numpy(), rtol=1e-4, atol=1e-4)
         np.testing.assert_allclose(grads.detach().cpu().numpy(), expected["grad"].cpu().numpy(), rtol=1e-4, atol=1e-4)
 
diff --git a/tests/test_handler_classification_saver.py b/tests/test_handler_classification_saver.py
index 87ce5ca3f8..e06c6e95f0 100644
--- a/tests/test_handler_classification_saver.py
+++ b/tests/test_handler_classification_saver.py
@@ -45,7 +45,7 @@ def _train_func(engine, batch):
             def _test_file(filename):
                 filepath = os.path.join(tempdir, filename)
                 self.assertTrue(os.path.exists(filepath))
-                with open(filepath, "r") as f:
+                with open(filepath) as f:
                     reader = csv.reader(f)
                     i = 0
                     for row in reader:
diff --git a/tests/test_handler_classification_saver_dist.py b/tests/test_handler_classification_saver_dist.py
index 70cc0ca42f..d9bbe67ecd 100644
--- a/tests/test_handler_classification_saver_dist.py
+++ b/tests/test_handler_classification_saver_dist.py
@@ -61,7 +61,7 @@ def _train_func(engine, batch):
             filepath = os.path.join(tempdir, "predictions.csv")
             if rank == 1:
                 self.assertTrue(os.path.exists(filepath))
-                with open(filepath, "r") as f:
+                with open(filepath) as f:
                     reader = csv.reader(f)
                     i = 0
                     for row in reader:
diff --git a/tests/test_handler_parameter_scheduler.py b/tests/test_handler_parameter_scheduler.py
index 5b3e845ace..55ea6a4af2 100644
--- a/tests/test_handler_parameter_scheduler.py
+++ b/tests/test_handler_parameter_scheduler.py
@@ -9,7 +9,7 @@
 
 class ToyNet(Module):
     def __init__(self, value):
-        super(ToyNet, self).__init__()
+        super().__init__()
         self.value = value
 
     def forward(self, input):
diff --git a/tests/test_handler_stats.py b/tests/test_handler_stats.py
index 84cdef59a8..9b7ad19dcc 100644
--- a/tests/test_handler_stats.py
+++ b/tests/test_handler_stats.py
@@ -140,7 +140,7 @@ def _train_func(engine, batch):
             engine.run(range(3), max_epochs=2)
             handler.close()
             stats_handler.logger.removeHandler(handler)
-            with open(filename, "r") as f:
+            with open(filename) as f:
                 output_str = f.read()
                 grep = re.compile(f".*{key_to_handler}.*")
                 has_key_word = re.compile(f".*{key_to_print}.*")
diff --git a/tests/test_handler_transform_inverter.py b/tests/test_handler_transform_inverter.py
deleted file mode 100644
index 385311eba7..0000000000
--- a/tests/test_handler_transform_inverter.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# Copyright 2020 - 2021 MONAI Consortium
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#     http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import sys
-import unittest
-
-import numpy as np
-import torch
-from ignite.engine import Engine
-
-from monai.data import CacheDataset, DataLoader, create_test_image_3d, decollate_batch
-from monai.engines.utils import IterationEvents
-from monai.handlers import TransformInverter
-from monai.transforms import (
-    AddChanneld,
-    CastToTyped,
-    Compose,
-    CopyItemsd,
-    LoadImaged,
-    Orientationd,
-    RandAffined,
-    RandAxisFlipd,
-    RandFlipd,
-    RandRotate90d,
-    RandRotated,
-    RandZoomd,
-    ResizeWithPadOrCropd,
-    ScaleIntensityd,
-    Spacingd,
-    ToTensord,
-)
-from monai.utils.misc import set_determinism
-from tests.utils import make_nifti_image
-
-KEYS = ["image", "label"]
-
-
-class TestTransformInverter(unittest.TestCase):
-    def test_invert(self):
-        set_determinism(seed=0)
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_3d(101, 100, 107, noise_max=100)]
-        transform = Compose(
-            [
-                LoadImaged(KEYS),
-                AddChanneld(KEYS),
-                Orientationd(KEYS, "RPS"),
-                Spacingd(KEYS, pixdim=(1.2, 1.01, 0.9), mode=["bilinear", "nearest"], dtype=np.float32),
-                ScaleIntensityd("image", minv=1, maxv=10),
-                RandFlipd(KEYS, prob=0.5, spatial_axis=[1, 2]),
-                RandAxisFlipd(KEYS, prob=0.5),
-                RandRotate90d(KEYS, spatial_axes=(1, 2)),
-                RandZoomd(KEYS, prob=0.5, min_zoom=0.5, max_zoom=1.1, keep_size=True),
-                RandRotated(KEYS, prob=0.5, range_x=np.pi, mode="bilinear", align_corners=True),
-                RandAffined(KEYS, prob=0.5, rotate_range=np.pi, mode="nearest"),
-                ResizeWithPadOrCropd(KEYS, 100),
-                ToTensord("image"),  # test to support both Tensor and Numpy array when inverting
-                CastToTyped(KEYS, dtype=[torch.uint8, np.uint8]),
-                CopyItemsd("label", times=2, names=["label_inverted1", "label_inverted2"]),
-                CopyItemsd("image", times=2, names=["image_inverted1", "image_inverted2"]),
-            ]
-        )
-        data = [{"image": im_fname, "label": seg_fname} for _ in range(12)]
-
-        # num workers = 0 for mac or gpu transforms
-        num_workers = 0 if sys.platform == "darwin" or torch.cuda.is_available() else 2
-
-        dataset = CacheDataset(data, transform=transform, progress=False)
-        loader = DataLoader(dataset, num_workers=num_workers, batch_size=5)
-
-        # set up engine
-        def _train_func(engine, batch):
-            self.assertTupleEqual(batch["image"].shape[1:], (1, 100, 100, 100))
-            engine.state.output = engine.state.batch = decollate_batch(batch)
-            engine.fire_event(IterationEvents.MODEL_COMPLETED)
-            return engine.state.output
-
-        engine = Engine(_train_func)
-        engine.register_events(*IterationEvents)
-
-        # set up testing handler
-        TransformInverter(
-            transform=transform,
-            output_keys=["image_inverted1", "label_inverted1"],
-            batch_keys="label",
-            meta_keys=["image_inverted1_meta_dict", "label_inverted1_meta_dict"],
-            batch_meta_keys="label_meta_dict",
-            nearest_interp=True,
-            to_tensor=[True, False],
-            device="cpu",
-        ).attach(engine)
-
-        # test different nearest interpolation values
-        TransformInverter(
-            transform=transform,
-            output_keys=["image_inverted2", "label_inverted2"],
-            batch_keys="image",
-            meta_keys=None,
-            batch_meta_keys="image_meta_dict",
-            meta_key_postfix="meta_dict",
-            nearest_interp=[True, False],
-            post_func=[lambda x: x + 10, lambda x: x],
-        ).attach(engine)
-
-        engine.run(loader, max_epochs=1)
-        set_determinism(seed=None)
-
-        for output in engine.state.output:
-            self.assertTupleEqual(output["image"].shape, (1, 100, 100, 100))
-            self.assertTupleEqual(output["label"].shape, (1, 100, 100, 100))
-            # check the nearest inerpolation mode
-            i = output["image_inverted1"]
-            torch.testing.assert_allclose(i.to(torch.uint8).to(torch.float), i.to(torch.float))
-            self.assertTupleEqual(i.shape, (1, 100, 101, 107))
-            i = output["label_inverted1"]
-            np.testing.assert_allclose(i.astype(np.uint8).astype(np.float32), i.astype(np.float32))
-            self.assertTupleEqual(i.shape, (1, 100, 101, 107))
-
-            # check the case that different items use different interpolation mode to invert transforms
-            d = output["image_inverted2"]
-            # if the interpolation mode is nearest, accumulated diff should be smaller than 1
-            self.assertLess(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 1.0)
-            self.assertTupleEqual(d.shape, (1, 100, 101, 107))
-
-            d = output["label_inverted2"]
-            # if the interpolation mode is not nearest, accumulated diff should be greater than 10000
-            self.assertGreater(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 10000.0)
-            self.assertTupleEqual(d.shape, (1, 100, 101, 107))
-
-        # check labels match
-        reverted = engine.state.output[-1]["label_inverted1"].astype(np.int32)
-        original = LoadImaged(KEYS)(data[-1])["label"]
-        n_good = np.sum(np.isclose(reverted, original, atol=1e-3))
-        reverted_name = engine.state.batch[-1]["label_inverted1_meta_dict"]["filename_or_obj"]
-        original_name = data[-1]["label"]
-        self.assertEqual(reverted_name, original_name)
-        print("invert diff", reverted.size - n_good)
-        # 25300: 2 workers (cpu, non-macos)
-        # 1812: 0 workers (gpu or macos)
-        # 1824: torch 1.5.1
-        self.assertTrue((reverted.size - n_good) in (25300, 1812, 1824), "diff. in 3 possible values")
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_histogram_normalize.py b/tests/test_histogram_normalize.py
index b69fb1d927..e0178166d9 100644
--- a/tests/test_histogram_normalize.py
+++ b/tests/test_histogram_normalize.py
@@ -15,28 +15,37 @@
 from parameterized import parameterized
 
 from monai.transforms import HistogramNormalize
-
-TEST_CASE_1 = [
-    {"num_bins": 4, "min": 1, "max": 5, "mask": np.array([1, 1, 1, 1, 1, 0])},
-    np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]),
-    np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
-]
-
-TEST_CASE_2 = [
-    {"num_bins": 4, "max": 4, "dtype": np.uint8},
-    np.array([0.0, 1.0, 2.0, 3.0, 4.0]),
-    np.array([0, 0, 1, 3, 4]),
-]
-
-TEST_CASE_3 = [
-    {"num_bins": 256, "max": 255, "dtype": np.uint8},
-    np.array([[[100.0, 200.0], [150.0, 250.0]]]),
-    np.array([[[0, 170], [70, 255]]]),
-]
+from tests.utils import TEST_NDARRAYS
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"num_bins": 4, "min": 1, "max": 5, "mask": np.array([1, 1, 1, 1, 1, 0])},
+            p(np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])),
+            np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"num_bins": 4, "max": 4, "dtype": np.uint8},
+            p(np.array([0.0, 1.0, 2.0, 3.0, 4.0])),
+            np.array([0, 0, 1, 3, 4]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"num_bins": 256, "max": 255, "dtype": np.uint8},
+            p(np.array([[[100.0, 200.0], [150.0, 250.0]]])),
+            np.array([[[0, 170], [70, 255]]]),
+        ]
+    )
 
 
 class TestHistogramNormalize(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = HistogramNormalize(**argments)(image)
         np.testing.assert_allclose(result, expected_data)
diff --git a/tests/test_histogram_normalized.py b/tests/test_histogram_normalized.py
index 68647e82fb..314c7bd75b 100644
--- a/tests/test_histogram_normalized.py
+++ b/tests/test_histogram_normalized.py
@@ -15,28 +15,37 @@
 from parameterized import parameterized
 
 from monai.transforms import HistogramNormalized
-
-TEST_CASE_1 = [
-    {"keys": "img", "num_bins": 4, "min": 1, "max": 5, "mask_key": "mask"},
-    {"img": np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0]), "mask": np.array([1, 1, 1, 1, 1, 0])},
-    np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
-]
-
-TEST_CASE_2 = [
-    {"keys": "img", "num_bins": 4, "max": 4, "dtype": np.uint8},
-    {"img": np.array([0.0, 1.0, 2.0, 3.0, 4.0])},
-    np.array([0, 0, 1, 3, 4]),
-]
-
-TEST_CASE_3 = [
-    {"keys": "img", "num_bins": 256, "max": 255, "dtype": np.uint8},
-    {"img": np.array([[[100.0, 200.0], [150.0, 250.0]]])},
-    np.array([[[0, 170], [70, 255]]]),
-]
+from tests.utils import TEST_NDARRAYS
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"keys": "img", "num_bins": 4, "min": 1, "max": 5, "mask_key": "mask"},
+            {"img": p(np.array([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])), "mask": p(np.array([1, 1, 1, 1, 1, 0]))},
+            np.array([1.0, 1.5, 2.5, 4.0, 5.0, 5.0]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"keys": "img", "num_bins": 4, "max": 4, "dtype": np.uint8},
+            {"img": p(np.array([0.0, 1.0, 2.0, 3.0, 4.0]))},
+            np.array([0, 0, 1, 3, 4]),
+        ]
+    )
+
+    TESTS.append(
+        [
+            {"keys": "img", "num_bins": 256, "max": 255, "dtype": np.uint8},
+            {"img": p(np.array([[[100.0, 200.0], [150.0, 250.0]]]))},
+            np.array([[[0, 170], [70, 255]]]),
+        ]
+    )
 
 
 class TestHistogramNormalized(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         result = HistogramNormalized(**argments)(image)["img"]
         np.testing.assert_allclose(result, expected_data)
diff --git a/tests/test_integration_classification_2d.py b/tests/test_integration_classification_2d.py
index 03b5571973..7a94780f82 100644
--- a/tests/test_integration_classification_2d.py
+++ b/tests/test_integration_classification_2d.py
@@ -197,7 +197,7 @@ def setUp(self):
 
         assert os.path.exists(data_dir)
 
-        class_names = sorted((x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x))))
+        class_names = sorted(x for x in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, x)))
         image_files = [
             [os.path.join(data_dir, class_name, x) for x in sorted(os.listdir(os.path.join(data_dir, class_name)))]
             for class_name in class_names
diff --git a/tests/test_integration_determinism.py b/tests/test_integration_determinism.py
index e077420420..7725990763 100644
--- a/tests/test_integration_determinism.py
+++ b/tests/test_integration_determinism.py
@@ -41,7 +41,7 @@ def __len__(self):
             return train_steps
 
     net = UNet(
-        dimensions=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
+        spatial_dims=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
     ).to(device)
 
     loss = DiceLoss(sigmoid=True)
diff --git a/tests/test_integration_segmentation_3d.py b/tests/test_integration_segmentation_3d.py
index d5eb69f7af..215a5b3f9a 100644
--- a/tests/test_integration_segmentation_3d.py
+++ b/tests/test_integration_segmentation_3d.py
@@ -100,7 +100,7 @@ def run_training_test(root_dir, device="cuda:0", cachedataset=0, readers=(None,
 
     # create UNet, DiceLoss and Adam optimizer
     model = monai.networks.nets.UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
@@ -199,7 +199,7 @@ def run_inference_test(root_dir, device="cuda:0"):
     dice_metric = DiceMetric(include_background=True, reduction="mean", get_not_nans=False)
 
     model = UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
diff --git a/tests/test_integration_sliding_window.py b/tests/test_integration_sliding_window.py
index b63f331ba6..0522bf080e 100644
--- a/tests/test_integration_sliding_window.py
+++ b/tests/test_integration_sliding_window.py
@@ -34,7 +34,7 @@ def run_test(batch_size, img_name, seg_name, output_dir, device="cuda:0"):
     loader = DataLoader(ds, batch_size=1, pin_memory=torch.cuda.is_available())
 
     net = UNet(
-        dimensions=3, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
+        spatial_dims=3, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
     ).to(device)
     roi_size = (16, 32, 48)
     sw_batch_size = batch_size
diff --git a/tests/test_integration_stn.py b/tests/test_integration_stn.py
index c1fcfe7a89..998eacbf41 100644
--- a/tests/test_integration_stn.py
+++ b/tests/test_integration_stn.py
@@ -9,7 +9,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from __future__ import print_function
 
 import unittest
 
diff --git a/tests/test_integration_unet_2d.py b/tests/test_integration_unet_2d.py
index a46a174dc9..88e6d7e795 100644
--- a/tests/test_integration_unet_2d.py
+++ b/tests/test_integration_unet_2d.py
@@ -32,10 +32,10 @@ def __len__(self):
             return train_steps
 
     if net_name == "basicunet":
-        net = BasicUNet(dimensions=2, in_channels=1, out_channels=1, features=(4, 8, 8, 16, 16, 32))
+        net = BasicUNet(spatial_dims=2, in_channels=1, out_channels=1, features=(4, 8, 8, 16, 16, 32))
     elif net_name == "unet":
         net = UNet(
-            dimensions=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
+            spatial_dims=2, in_channels=1, out_channels=1, channels=(4, 8, 16, 32), strides=(2, 2, 2), num_res_units=2
         )
     net.to(device)
 
diff --git a/tests/test_integration_workflows.py b/tests/test_integration_workflows.py
index 7fcc0b4064..4a3e027993 100644
--- a/tests/test_integration_workflows.py
+++ b/tests/test_integration_workflows.py
@@ -98,7 +98,7 @@ def run_training_test(root_dir, device="cuda:0", amp=False, num_workers=4):
 
     # create UNet, DiceLoss and Adam optimizer
     net = monai.networks.nets.UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
@@ -230,7 +230,7 @@ def run_inference_test(root_dir, model_file, device="cuda:0", amp=False, num_wor
 
     # create UNet, DiceLoss and Adam optimizer
     net = monai.networks.nets.UNet(
-        dimensions=3,
+        spatial_dims=3,
         in_channels=1,
         out_channels=1,
         channels=(16, 32, 64, 128, 256),
diff --git a/tests/test_intensity_stats.py b/tests/test_intensity_stats.py
index 059271e442..92a2c04585 100644
--- a/tests/test_intensity_stats.py
+++ b/tests/test_intensity_stats.py
@@ -31,7 +31,7 @@
 ]
 
 TEST_CASE_3 = [
-    {"ops": [lambda x: np.mean(x), "max", lambda x: np.min(x)], "key_prefix": "orig"},
+    {"ops": [np.mean, "max", np.min], "key_prefix": "orig"},
     np.array([[[0.0, 1.0], [2.0, 3.0]]]),
     None,
     {"orig_custom_0": 1.5, "orig_max": 3.0, "orig_custom_1": 0.0},
diff --git a/tests/test_intensity_statsd.py b/tests/test_intensity_statsd.py
index 8c8bc8795a..596c80deb5 100644
--- a/tests/test_intensity_statsd.py
+++ b/tests/test_intensity_statsd.py
@@ -34,7 +34,7 @@
 ]
 
 TEST_CASE_3 = [
-    {"keys": "img", "ops": [lambda x: np.mean(x), "max", lambda x: np.min(x)], "key_prefix": "orig"},
+    {"keys": "img", "ops": [np.mean, "max", np.min], "key_prefix": "orig"},
     {"img": np.array([[[0.0, 1.0], [2.0, 3.0]]])},
     "img_meta_dict",
     {"orig_custom_0": 1.5, "orig_max": 3.0, "orig_custom_1": 0.0},
diff --git a/tests/test_inverse.py b/tests/test_inverse.py
index f2470d47fd..b09f532fd1 100644
--- a/tests/test_inverse.py
+++ b/tests/test_inverse.py
@@ -566,8 +566,8 @@ def setUp(self):
                 "other": np.array(im_1d, copy=True),
             }
 
-        im_2d_fname, seg_2d_fname = [make_nifti_image(i) for i in create_test_image_2d(101, 100)]
-        im_3d_fname, seg_3d_fname = [make_nifti_image(i, affine) for i in create_test_image_3d(100, 101, 107)]
+        im_2d_fname, seg_2d_fname = (make_nifti_image(i) for i in create_test_image_2d(101, 100))
+        im_3d_fname, seg_3d_fname = (make_nifti_image(i, affine) for i in create_test_image_3d(100, 101, 107))
 
         load_ims = Compose([LoadImaged(KEYS), AddChanneld(KEYS)])
         self.all_data["2D"] = load_ims({"image": im_2d_fname, "label": seg_2d_fname})
@@ -666,7 +666,7 @@ def test_inverse_inferred_seg(self, extra_transform):
 
         device = "cuda" if torch.cuda.is_available() else "cpu"
         model = UNet(
-            dimensions=2,
+            spatial_dims=2,
             in_channels=1,
             out_channels=1,
             channels=(2, 4),
diff --git a/tests/test_inverse_collation.py b/tests/test_inverse_collation.py
index c302e04017..b986f21bce 100644
--- a/tests/test_inverse_collation.py
+++ b/tests/test_inverse_collation.py
@@ -48,7 +48,12 @@
     for t in [
         RandFlipd(keys=KEYS, prob=0.5, spatial_axis=[1, 2]),
         RandAxisFlipd(keys=KEYS, prob=0.5),
-        RandRotate90d(keys=KEYS, spatial_axes=(1, 2)),
+        Compose(
+            [
+                RandRotate90d(keys=KEYS, spatial_axes=(1, 2)),
+                ToTensord(keys=KEYS),
+            ]
+        ),
         RandZoomd(keys=KEYS, prob=0.5, min_zoom=0.5, max_zoom=1.1, keep_size=True),
         RandRotated(keys=KEYS, prob=0.5, range_x=np.pi),
         RandAffined(
@@ -56,7 +61,6 @@
             prob=0.5,
             rotate_range=np.pi,
             device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
-            as_tensor_output=False,
         ),
     ]
 ]
@@ -67,7 +71,12 @@
     for t in [
         RandFlipd(keys=KEYS, prob=0.5, spatial_axis=[1]),
         RandAxisFlipd(keys=KEYS, prob=0.5),
-        RandRotate90d(keys=KEYS, prob=0.5, spatial_axes=(0, 1)),
+        Compose(
+            [
+                RandRotate90d(keys=KEYS, prob=0.5, spatial_axes=(0, 1)),
+                ToTensord(keys=KEYS),
+            ]
+        ),
         RandZoomd(keys=KEYS, prob=0.5, min_zoom=0.5, max_zoom=1.1, keep_size=True),
         RandRotated(keys=KEYS, prob=0.5, range_x=np.pi),
         RandAffined(
@@ -75,7 +84,6 @@
             prob=0.5,
             rotate_range=np.pi,
             device=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
-            as_tensor_output=False,
         ),
     ]
 ]
@@ -91,12 +99,12 @@ def setUp(self):
         set_determinism(seed=0)
 
         b_size = 11
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_3d(101, 100, 107)]
+        im_fname, seg_fname = (make_nifti_image(i) for i in create_test_image_3d(101, 100, 107))
         load_ims = Compose([LoadImaged(KEYS), AddChanneld(KEYS)])
         self.data_3d = [load_ims({"image": im_fname, "label": seg_fname}) for _ in range(b_size)]
 
         b_size = 8
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_2d(62, 37, rad_max=10)]
+        im_fname, seg_fname = (make_nifti_image(i) for i in create_test_image_2d(62, 37, rad_max=10))
         load_ims = Compose([LoadImaged(KEYS), AddChanneld(KEYS)])
         self.data_2d = [load_ims({"image": im_fname, "label": seg_fname}) for _ in range(b_size)]
 
@@ -107,10 +115,7 @@ def tearDown(self):
 
     @parameterized.expand(TESTS_2D + TESTS_3D)
     def test_collation(self, _, transform, collate_fn, ndim):
-        if ndim == 3:
-            data = self.data_3d
-        else:
-            data = self.data_2d
+        data = self.data_3d if ndim == 3 else self.data_2d
         if collate_fn:
             modified_transform = transform
         else:
diff --git a/tests/test_invertd.py b/tests/test_invertd.py
index 5b98653f0a..8c2066a167 100644
--- a/tests/test_invertd.py
+++ b/tests/test_invertd.py
@@ -34,6 +34,7 @@
     ResizeWithPadOrCropd,
     ScaleIntensityd,
     Spacingd,
+    ToTensord,
 )
 from monai.utils.misc import set_determinism
 from tests.utils import make_nifti_image
@@ -44,7 +45,7 @@
 class TestInvertd(unittest.TestCase):
     def test_invert(self):
         set_determinism(seed=0)
-        im_fname, seg_fname = [make_nifti_image(i) for i in create_test_image_3d(101, 100, 107, noise_max=100)]
+        im_fname, seg_fname = (make_nifti_image(i) for i in create_test_image_3d(101, 100, 107, noise_max=100))
         transform = Compose(
             [
                 LoadImaged(KEYS),
@@ -63,8 +64,10 @@ def test_invert(self):
                 CopyItemsd("image_meta_dict", times=1, names="test_dict"),
                 # test to support Tensor, Numpy array and dictionary when inverting
                 EnsureTyped(keys=["image", "test_dict"]),
+                ToTensord("image"),
                 CastToTyped(KEYS, dtype=[torch.uint8, np.uint8]),
-                CopyItemsd("label", times=1, names="label_inverted"),
+                CopyItemsd("label", times=2, names=["label_inverted", "label_inverted1"]),
+                CopyItemsd("image", times=2, names=["image_inverted", "image_inverted1"]),
             ]
         )
         data = [{"image": im_fname, "label": seg_fname} for _ in range(12)]
@@ -76,25 +79,58 @@ def test_invert(self):
         loader = DataLoader(dataset, num_workers=num_workers, batch_size=5)
         inverter = Invertd(
             # `image` was not copied, invert the original value directly
-            keys=["image", "label_inverted", "test_dict"],
+            keys=["image_inverted", "label_inverted", "test_dict"],
             transform=transform,
             orig_keys=["label", "label", "test_dict"],
-            meta_keys=["image_meta_dict", "label_inverted_meta_dict", None],
+            meta_keys=["image_inverted_meta_dict", "label_inverted_meta_dict", None],
             orig_meta_keys=["label_meta_dict", "label_meta_dict", None],
             nearest_interp=True,
             to_tensor=[True, False, False],
             device="cpu",
         )
 
+        inverter_1 = Invertd(
+            # `image` was not copied, invert the original value directly
+            keys=["image_inverted1", "label_inverted1"],
+            transform=transform,
+            orig_keys=["image", "image"],
+            meta_keys=["image_inverted1_meta_dict", "label_inverted1_meta_dict"],
+            orig_meta_keys=["image_meta_dict", "image_meta_dict"],
+            nearest_interp=[True, False],
+            to_tensor=[True, True],
+            device="cpu",
+        )
+
+        expected_keys = [
+            "image",
+            "image_inverted",
+            "image_inverted1",
+            "image_inverted1_meta_dict",
+            "image_inverted_meta_dict",
+            "image_meta_dict",
+            "image_transforms",
+            "label",
+            "label_inverted",
+            "label_inverted1",
+            "label_inverted1_meta_dict",
+            "label_inverted_meta_dict",
+            "label_meta_dict",
+            "label_transforms",
+            "test_dict",
+            "test_dict_transforms",
+        ]
         # execute 1 epoch
         for d in loader:
             d = decollate_batch(d)
             for item in d:
                 item = inverter(item)
-                # this unit test only covers basic function, test_handler_transform_inverter covers more
+                item = inverter_1(item)
+
+                self.assertListEqual(sorted(item), expected_keys)
+                self.assertTupleEqual(item["image"].shape[1:], (100, 100, 100))
                 self.assertTupleEqual(item["label"].shape[1:], (100, 100, 100))
-                # check the nearest inerpolation mode
-                i = item["image"]
+                # check the nearest interpolation mode
+                i = item["image_inverted"]
                 torch.testing.assert_allclose(i.to(torch.uint8).to(torch.float), i.to(torch.float))
                 self.assertTupleEqual(i.shape[1:], (100, 101, 107))
                 i = item["label_inverted"]
@@ -104,6 +140,30 @@ def test_invert(self):
                 self.assertTrue(isinstance(item["test_dict"]["affine"], np.ndarray))
                 self.assertTrue(isinstance(item["test_dict"]["filename_or_obj"], str))
 
+                # check the case that different items use different interpolation mode to invert transforms
+                d = item["image_inverted1"]
+                # if the interpolation mode is nearest, accumulated diff should be smaller than 1
+                self.assertLess(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 1.0)
+                self.assertTupleEqual(d.shape, (1, 100, 101, 107))
+
+                d = item["label_inverted1"]
+                # if the interpolation mode is not nearest, accumulated diff should be greater than 10000
+                self.assertGreater(torch.sum(d.to(torch.float) - d.to(torch.uint8).to(torch.float)).item(), 10000.0)
+                self.assertTupleEqual(d.shape, (1, 100, 101, 107))
+
+        # check labels match
+        reverted = item["label_inverted"].astype(np.int32)
+        original = LoadImaged(KEYS)(data[-1])["label"]
+        n_good = np.sum(np.isclose(reverted, original, atol=1e-3))
+        reverted_name = item["label_inverted_meta_dict"]["filename_or_obj"]
+        original_name = data[-1]["label"]
+        self.assertEqual(reverted_name, original_name)
+        print("invert diff", reverted.size - n_good)
+        # 25300: 2 workers (cpu, non-macos)
+        # 1812: 0 workers (gpu or macos)
+        # 1824: torch 1.5.1
+        self.assertTrue((reverted.size - n_good) in (25300, 1812, 1824), "diff. in 3 possible values")
+
         set_determinism(seed=None)
 
 
diff --git a/tests/test_k_space_spike_noise.py b/tests/test_k_space_spike_noise.py
index bb6d05e676..66763f286f 100644
--- a/tests/test_k_space_spike_noise.py
+++ b/tests/test_k_space_spike_noise.py
@@ -20,17 +20,14 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import KSpaceSpikeNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for p in TEST_NDARRAYS:
+        TESTS.append((shape, p))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestKSpaceSpikeNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -40,34 +37,44 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
-        im = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        im, _ = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
+        return im_type(im[None])
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type):
 
-        im = self.get_data(im_shape, as_tensor_input)
+        im = self.get_data(im_shape, im_type)
         loc = [0, int(im.shape[1] / 2), 0] if len(im_shape) == 2 else [0, int(im.shape[1] / 2), 0, 0]
         k_intensity = 10
-        t = KSpaceSpikeNoise(loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoise(loc, k_intensity)
 
         out1 = t(deepcopy(im))
         out2 = t(deepcopy(im))
 
+        self.assertEqual(type(im), type(out1))
+        if isinstance(out1, torch.Tensor):
+            self.assertEqual(im.device, out1.device)
+            out1 = out1.cpu()
+            out2 = out2.cpu()
+
         np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
 
-    @parameterized.expand(TEST_CASES)
-    def test_highlighted_kspace_pixel(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_highlighted_kspace_pixel(self, im_shape, as_tensor_input):
 
         im = self.get_data(im_shape, as_tensor_input)
         loc = [0, int(im.shape[1] / 2), 0] if len(im_shape) == 2 else [0, int(im.shape[1] / 2), 0, 0]
         k_intensity = 10
-        t = KSpaceSpikeNoise(loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoise(loc, k_intensity)
         out = t(im)
 
+        self.assertEqual(type(im), type(out))
+        if isinstance(out, torch.Tensor):
+            self.assertEqual(im.device, out.device)
+            out = out.cpu()
+
         n_dims = len(im_shape)
         out_k = fftshift(fftn(out, axes=tuple(range(-n_dims, 0))), axes=tuple(range(-n_dims, 0)))
         log_mag = np.log(np.absolute(out_k))
diff --git a/tests/test_k_space_spike_noised.py b/tests/test_k_space_spike_noised.py
index 616662b3cd..3fa6a394f3 100644
--- a/tests/test_k_space_spike_noised.py
+++ b/tests/test_k_space_spike_noised.py
@@ -20,19 +20,16 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import KSpaceSpikeNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for p in TEST_NDARRAYS:
+        TESTS.append((shape, p))
 
 KEYS = ["image", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestKSpaceSpikeNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -42,55 +39,69 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [im[None] for im in ims]
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
+        ims = [im_type(im[None]) for im in ims]
+        return {k: v for k, v in zip(KEYS, ims)}
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type):
 
-        data = self.get_data(im_shape, as_tensor_input)
+        data = self.get_data(im_shape, im_type)
         loc = [0] + [int(im_shape[i] / 2) for i in range(len(im_shape))]
         k_intensity = 10
 
-        t = KSpaceSpikeNoised(KEYS, loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoised(KEYS, loc, k_intensity)
         out1 = t(deepcopy(data))
         out2 = t(deepcopy(data))
 
         for k in KEYS:
+            self.assertEqual(type(out1[k]), type(data[k]))
+            if isinstance(out1[k], torch.Tensor):
+                self.assertEqual(out1[k].device, data[k].device)
+                out1[k] = out1[k].cpu()
+                out2[k] = out2[k].cpu()
             np.testing.assert_allclose(out1[k], out2[k])
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
 
-    @parameterized.expand(TEST_CASES)
-    def test_highlighted_kspace_pixel(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_highlighted_kspace_pixel(self, im_shape, im_type):
 
-        data = self.get_data(im_shape, as_tensor_input)
+        data = self.get_data(im_shape, im_type)
         loc = [0] + [int(im_shape[i] / 2) for i in range(len(im_shape))]
         k_intensity = 10
 
-        t = KSpaceSpikeNoised(KEYS, loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoised(KEYS, loc, k_intensity)
         out = t(data)
 
         for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k] = out[k].cpu()
+
             n_dims = len(im_shape)
             out_k = fftshift(fftn(out[k], axes=tuple(range(-n_dims, 0))), axes=tuple(range(-n_dims, 0)))
             log_mag = np.log(np.absolute(out_k))
             np.testing.assert_allclose(k_intensity, log_mag[tuple(loc)], 1e-1)
 
-    @parameterized.expand(TEST_CASES)
-    def test_dict_matches(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_dict_matches(self, im_shape, im_type):
+        data = self.get_data(im_shape, im_type)
         # use same image for both dictionary entries to check same trans is applied to them
         data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
         loc = [0] + [int(im_shape[i] / 2) for i in range(len(im_shape))]
         k_intensity = 10
 
-        t = KSpaceSpikeNoised(KEYS, loc, k_intensity, as_tensor_output)
+        t = KSpaceSpikeNoised(KEYS, loc, k_intensity)
         out = t(deepcopy(data))
 
+        for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k] = out[k].cpu()
+
         np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
 
 
diff --git a/tests/test_label_to_mask.py b/tests/test_label_to_mask.py
index 9caa7252f3..6c8f935fbc 100644
--- a/tests/test_label_to_mask.py
+++ b/tests/test_label_to_mask.py
@@ -64,7 +64,7 @@ def test_value(self, argments, image, expected_data):
         self.assertEqual(type(result), type(image))
         if isinstance(result, torch.Tensor):
             self.assertEqual(result.device, image.device)
-        assert_allclose(result, expected_data)
+        assert_allclose(result, expected_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_label_to_maskd.py b/tests/test_label_to_maskd.py
index b8f0d3c171..b2073e8ac3 100644
--- a/tests/test_label_to_maskd.py
+++ b/tests/test_label_to_maskd.py
@@ -65,7 +65,7 @@ def test_value(self, argments, input_data, expected_data):
         self.assertEqual(type(r), type(i))
         if isinstance(r, torch.Tensor):
             self.assertEqual(r.device, i.device)
-        assert_allclose(r, expected_data)
+        assert_allclose(r, expected_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_lesion_froc.py b/tests/test_lesion_froc.py
index 2454de88fa..4a67c8d0b3 100644
--- a/tests/test_lesion_froc.py
+++ b/tests/test_lesion_froc.py
@@ -19,18 +19,19 @@
 from monai.apps.pathology.metrics import LesionFROC
 from monai.utils import optional_import
 
-_, has_cucim = optional_import("cucim")
+_cucim, has_cucim = optional_import("cucim")
+has_cucim = has_cucim and hasattr(_cucim, "CuImage")
 _, has_skimage = optional_import("skimage.measure")
 _, has_sp = optional_import("scipy.ndimage")
-PILImage, has_pil = optional_import("PIL.Image")
+imwrite, has_tif = optional_import("tifffile", name="imwrite")
 
 
 def save_as_tif(filename, array):
     array = array[::-1, ...]  # Upside-down
-    img = PILImage.fromarray(array)
     if not filename.endswith(".tif"):
         filename += ".tif"
-    img.save(os.path.join("tests", "testing_data", filename))
+    file_path = os.path.join("tests", "testing_data", filename)
+    imwrite(file_path, array, compress="jpeg", tile=(16, 16))
 
 
 def around(val, interval=3):
@@ -301,7 +302,7 @@ class TestEvaluateTumorFROC(unittest.TestCase):
     @skipUnless(has_cucim, "Requires cucim")
     @skipUnless(has_skimage, "Requires skimage")
     @skipUnless(has_sp, "Requires scipy")
-    @skipUnless(has_pil, "Requires PIL")
+    @skipUnless(has_tif, "Requires tifffile")
     def setUp(self):
         prepare_test_data()
 
diff --git a/tests/test_lltm.py b/tests/test_lltm.py
index f1311379bc..4186c91246 100644
--- a/tests/test_lltm.py
+++ b/tests/test_lltm.py
@@ -15,7 +15,9 @@
 from parameterized import parameterized
 
 from monai.networks.layers import LLTM
-from tests.utils import SkipIfNoModule
+from tests.utils import SkipIfNoModule, is_tf32_env
+
+_rtol = 0.001 if is_tf32_env() else 0.0001
 
 TEST_CASE_1 = [
     {"input_features": 32, "state_size": 2},
@@ -50,8 +52,8 @@ def test_value_cuda(self, input_param, expected_h, expected_c):
         new_h, new_c = lltm(x, (h, c))
         (new_h.sum() + new_c.sum()).backward()
 
-        torch.testing.assert_allclose(new_h, expected_h.to(device), rtol=0.0001, atol=1e-04)
-        torch.testing.assert_allclose(new_c, expected_c.to(device), rtol=0.0001, atol=1e-04)
+        torch.testing.assert_allclose(new_h, expected_h.to(device), rtol=_rtol, atol=0.001)
+        torch.testing.assert_allclose(new_c, expected_c.to(device), rtol=_rtol, atol=0.001)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_lr_scheduler.py b/tests/test_lr_scheduler.py
index aa126f7848..20fd7ca8cf 100644
--- a/tests/test_lr_scheduler.py
+++ b/tests/test_lr_scheduler.py
@@ -19,7 +19,7 @@
 
 class SchedulerTestNet(torch.nn.Module):
     def __init__(self):
-        super(SchedulerTestNet, self).__init__()
+        super().__init__()
         self.conv1 = torch.nn.Conv2d(1, 1, 1)
         self.conv2 = torch.nn.Conv2d(1, 1, 1)
 
@@ -47,11 +47,11 @@ def test_shape(self, input_param, expected_lr):
         self.assertEqual(len([scheduler.get_last_lr()[0]]), 1)
         lrs_1 = []
         for _ in range(input_param["t_total"]):
-            lrs_1.append(float("{:.3f}".format(scheduler.get_last_lr()[0])))
+            lrs_1.append(float(f"{scheduler.get_last_lr()[0]:.3f}"))
             optimizer.step()
             scheduler.step()
         for a, b in zip(lrs_1, expected_lr):
-            self.assertEqual(a, b, msg="LR is wrong ! expected {}, got {}".format(b, a))
+            self.assertEqual(a, b, msg=f"LR is wrong ! expected {b}, got {a}")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_map_binary_to_indices.py b/tests/test_map_binary_to_indices.py
index 1fafa6f446..2d29aa7c0d 100644
--- a/tests/test_map_binary_to_indices.py
+++ b/tests/test_map_binary_to_indices.py
@@ -15,50 +15,58 @@
 from parameterized import parameterized
 
 from monai.transforms import map_binary_to_indices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]), "image": None, "image_threshold": 0.0},
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 4, 8]),
-]
-
-TEST_CASE_2 = [
-    {
-        "label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-        "image": np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]]),
-        "image_threshold": 0.0,
-    },
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
-
-TEST_CASE_3 = [
-    {
-        "label": np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]]),
-        "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-        "image_threshold": 1.0,
-    },
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
-
-TEST_CASE_4 = [
-    {
-        "label": np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]]),
-        "image": np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]]),
-        "image_threshold": 1.0,
-    },
-    np.array([1, 2, 3, 5, 6, 7]),
-    np.array([0, 8]),
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"label": p(np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])), "image": None, "image_threshold": 0.0},
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 4, 8]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])),
+                "image": p(np.array([[[1, 1, 1], [1, 0, 1], [1, 1, 1]]])),
+                "image_threshold": 0.0,
+            },
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 8]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 1], [1, 0, 1], [1, 1, 0]]])),
+                "image": p(np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])),
+                "image_threshold": 1.0,
+            },
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 8]),
+        ]
+    )
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 2], [3, 0, 4], [5, 6, 0]]])),
+                "image": p(np.array([[[3, 3, 3], [3, 1, 3], [3, 3, 3]]])),
+                "image_threshold": 1.0,
+            },
+            np.array([1, 2, 3, 5, 6, 7]),
+            np.array([0, 8]),
+        ]
+    )
 
 
 class TestMapBinaryToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
+    @parameterized.expand(TESTS)
     def test_type_shape(self, input_data, expected_fg, expected_bg):
         fg_indices, bg_indices = map_binary_to_indices(**input_data)
-        np.testing.assert_allclose(fg_indices, expected_fg)
-        np.testing.assert_allclose(bg_indices, expected_bg)
+        assert_allclose(fg_indices, expected_fg, type_test=False)
+        assert_allclose(bg_indices, expected_bg, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_map_classes_to_indices.py b/tests/test_map_classes_to_indices.py
index 2320954520..a585bd006b 100644
--- a/tests/test_map_classes_to_indices.py
+++ b/tests/test_map_classes_to_indices.py
@@ -15,86 +15,145 @@
 from parameterized import parameterized
 
 from monai.transforms import map_classes_to_indices
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    # test Argmax data
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]), "num_classes": 3, "image": None, "image_threshold": 0.0},
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            # test Argmax data
+            {
+                "label": p(np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])),
+                "num_classes": 3,
+                "image": None,
+                "image_threshold": 0.0,
+            },
+            [
+                np.array([0, 4, 8]),
+                np.array([1, 5, 6]),
+                np.array([2, 3, 7]),
+            ],
+        ]
+    )
 
-TEST_CASE_2 = [
-    {
-        "label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]),
-        "num_classes": 3,
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-        "image_threshold": 60,
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+    TESTS.append(
+        [
+            {
+                "label": p(np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])),
+                "num_classes": 3,
+                "image": p(np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]])),
+                "image_threshold": 60,
+            },
+            [
+                np.array([0, 8]),
+                np.array([1, 5, 6]),
+                np.array([3]),
+            ],
+        ]
+    )
 
-TEST_CASE_3 = [
-    # test One-Hot data
-    {
-        "label": np.array(
+    TESTS.append(
+        [
+            # test One-Hot data
+            {
+                "label": p(
+                    np.array(
+                        [
+                            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                        ]
+                    )
+                ),
+                "image": None,
+                "image_threshold": 0.0,
+            },
             [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        ),
-        "image": None,
-        "image_threshold": 0.0,
-    },
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7])],
-]
+                np.array([0, 4, 8]),
+                np.array([1, 5, 6]),
+                np.array([2, 3, 7]),
+            ],
+        ]
+    )
 
-TEST_CASE_4 = [
-    {
-        "label": np.array(
+    TESTS.append(
+        [
+            {
+                "label": p(
+                    np.array(
+                        [
+                            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                        ]
+                    )
+                ),
+                "num_classes": None,
+                "image": p(np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]])),
+                "image_threshold": 60,
+            },
             [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-            ]
-        ),
-        "num_classes": None,
-        "image": np.array([[[132, 1434, 51], [61, 0, 133], [523, 44, 232]]]),
-        "image_threshold": 60,
-    },
-    [np.array([0, 8]), np.array([1, 5, 6]), np.array([3])],
-]
+                np.array([0, 8]),
+                np.array([1, 5, 6]),
+                np.array([3]),
+            ],
+        ]
+    )
 
-TEST_CASE_5 = [
-    # test empty class
-    {"label": np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]]), "num_classes": 5, "image": None, "image_threshold": 0.0},
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7]), np.array([]), np.array([])],
-]
+    TESTS.append(
+        [
+            # test empty class
+            {
+                "label": p(np.array([[[0, 1, 2], [2, 0, 1], [1, 2, 0]]])),
+                "num_classes": 5,
+                "image": None,
+                "image_threshold": 0.0,
+            },
+            [
+                np.array([0, 4, 8]),
+                np.array([1, 5, 6]),
+                np.array([2, 3, 7]),
+                np.array([]),
+                np.array([]),
+            ],
+        ]
+    )
 
-TEST_CASE_6 = [
-    # test empty class
-    {
-        "label": np.array(
+    TESTS.append(
+        [
+            # test empty class
+            {
+                "label": p(
+                    np.array(
+                        [
+                            [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
+                            [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
+                            [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
+                            [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
+                            [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
+                        ]
+                    )
+                ),
+                "image": None,
+                "image_threshold": 0.0,
+            },
             [
-                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
-                [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
-                [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
-                [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
-                [[0, 0, 0], [0, 0, 0], [0, 0, 0]],
-            ]
-        ),
-        "image": None,
-        "image_threshold": 0.0,
-    },
-    [np.array([0, 4, 8]), np.array([1, 5, 6]), np.array([2, 3, 7]), np.array([]), np.array([])],
-]
+                np.array([0, 4, 8]),
+                np.array([1, 5, 6]),
+                np.array([2, 3, 7]),
+                np.array([]),
+                np.array([]),
+            ],
+        ]
+    )
 
 
 class TestMapClassesToIndices(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
+    @parameterized.expand(TESTS)
     def test_value(self, input_data, expected_indices):
         indices = map_classes_to_indices(**input_data)
         for i, e in zip(indices, expected_indices):
-            np.testing.assert_allclose(i, e)
+            assert_allclose(i, e, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_masked_inference_wsi_dataset.py b/tests/test_masked_inference_wsi_dataset.py
index 361c17e106..927ba9de5f 100644
--- a/tests/test_masked_inference_wsi_dataset.py
+++ b/tests/test_masked_inference_wsi_dataset.py
@@ -22,10 +22,10 @@
 from monai.utils import optional_import
 from tests.utils import skip_if_quick
 
-_, has_cim = optional_import("cucim")
+_, has_cim = optional_import("cucim", name="CuImage")
 _, has_osl = optional_import("openslide")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 base_name, extension = os.path.splitext(os.path.basename(FILE_URL))
 FILE_NAME = "temp_" + base_name
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", FILE_NAME + extension)
diff --git a/tests/test_mmar_download.py b/tests/test_mmar_download.py
index 6952e62c3c..725a6a8823 100644
--- a/tests/test_mmar_download.py
+++ b/tests/test_mmar_download.py
@@ -138,7 +138,7 @@ def test_load_ckpt(self, input_args, expected_name, expected_val):
 
     def test_unique(self):
         # model ids are unique
-        keys = sorted([m["id"] for m in MODEL_DESC])
+        keys = sorted(m["id"] for m in MODEL_DESC)
         self.assertTrue(keys == sorted(set(keys)))
 
     @SkipIfAtLeastPyTorchVersion((1, 6))
diff --git a/tests/test_nifti_rw.py b/tests/test_nifti_rw.py
index f16d80659c..ff7f11e47f 100644
--- a/tests/test_nifti_rw.py
+++ b/tests/test_nifti_rw.py
@@ -19,54 +19,66 @@
 
 from monai.data import write_nifti
 from monai.transforms import LoadImage, Orientation, Spacing
-from tests.utils import make_nifti_image
-
-TEST_IMAGE = np.arange(24).reshape((2, 4, 3))
-TEST_AFFINE = np.array(
-    [[-5.3, 0.0, 0.0, 102.01], [0.0, 0.52, 2.17, -7.50], [-0.0, 1.98, -0.26, -23.12], [0.0, 0.0, 0.0, 1.0]]
-)
-
-TEST_CASES = [
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=False, as_closest_canonical=True),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=True, as_closest_canonical=True),
-        np.array(
+from tests.utils import TEST_NDARRAYS, assert_allclose, make_nifti_image
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for q in TEST_NDARRAYS:
+        TEST_IMAGE = p(np.arange(24).reshape((2, 4, 3)))
+        TEST_AFFINE = q(
+            np.array(
+                [[-5.3, 0.0, 0.0, 102.01], [0.0, 0.52, 2.17, -7.50], [-0.0, 1.98, -0.26, -23.12], [0.0, 0.0, 0.0, 1.0]]
+            )
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=False, as_closest_canonical=True),
+                np.arange(24).reshape((2, 4, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=True, as_closest_canonical=True),
+                np.array(
+                    [
+                        [[12.0, 15.0, 18.0, 21.0], [13.0, 16.0, 19.0, 22.0], [14.0, 17.0, 20.0, 23.0]],
+                        [[0.0, 3.0, 6.0, 9.0], [1.0, 4.0, 7.0, 10.0], [2.0, 5.0, 8.0, 11.0]],
+                    ]
+                ),
+            ]
+        )
+        TESTS.append(
             [
-                [[12.0, 15.0, 18.0, 21.0], [13.0, 16.0, 19.0, 22.0], [14.0, 17.0, 20.0, 23.0]],
-                [[0.0, 3.0, 6.0, 9.0], [1.0, 4.0, 7.0, 10.0], [2.0, 5.0, 8.0, 11.0]],
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=True, as_closest_canonical=False),
+                np.arange(24).reshape((2, 4, 3)),
             ]
-        ),
-    ],
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=True, as_closest_canonical=False),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-    [
-        TEST_IMAGE,
-        TEST_AFFINE,
-        dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-    [
-        TEST_IMAGE,
-        None,
-        dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
-        np.arange(24).reshape((2, 4, 3)),
-    ],
-]
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                TEST_AFFINE,
+                dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
+                np.arange(24).reshape((2, 4, 3)),
+            ]
+        )
+        TESTS.append(
+            [
+                TEST_IMAGE,
+                None,
+                dict(reader="NibabelReader", image_only=False, as_closest_canonical=False),
+                np.arange(24).reshape((2, 4, 3)),
+            ]
+        )
 
 
 class TestNiftiLoadRead(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_orientation(self, array, affine, reader_param, expected):
         test_image = make_nifti_image(array, affine)
 
@@ -93,8 +105,8 @@ def test_orientation(self, array, affine, reader_param, expected):
             os.remove(test_image)
 
         if affine is not None:
-            np.testing.assert_allclose(saved_affine, affine)
-        np.testing.assert_allclose(saved_data, expected)
+            assert_allclose(saved_affine, affine, type_test=False)
+        assert_allclose(saved_data, expected, type_test=False)
 
     def test_consistency(self):
         np.set_printoptions(suppress=True, precision=3)
@@ -140,69 +152,81 @@ def test_consistency(self):
     def test_write_2d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(6).reshape((2, 3))
-            write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[0, 1, 2], [3.0, 4, 5]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(5).reshape((1, 5))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 1, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[0, 2, 4]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 1, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(6).reshape((2, 3)))
+                write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[0, 1, 2], [3.0, 4, 5]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = np.arange(5).reshape((1, 5))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 1, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[0, 2, 4]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 1, 1]))
 
     def test_write_3d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(6).reshape((1, 2, 3))
-            write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[0, 1, 2], [3, 4, 5]]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(5).reshape((1, 1, 5))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[0, 2, 4]]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(6).reshape((1, 2, 3)))
+                write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[0, 1, 2], [3, 4, 5]]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = p(np.arange(5).reshape((1, 1, 5)))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[0, 2, 4]]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
 
     def test_write_4d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(6).reshape((1, 1, 3, 2))
-            write_nifti(img, image_name, affine=np.diag([1.4, 1]), target_affine=np.diag([1, 1.4, 1]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[[0, 1], [2, 3], [4, 5]]]])
-            np.testing.assert_allclose(out.affine, np.diag([1, 1.4, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(5).reshape((1, 1, 5, 1))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), [[[[0], [2], [4]]]])
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(6).reshape((1, 1, 3, 2)))
+                write_nifti(img, image_name, affine=np.diag([1.4, 1]), target_affine=np.diag([1, 1.4, 1]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[[0, 1], [2, 3], [4, 5]]]])
+                np.testing.assert_allclose(out.affine, np.diag([1, 1.4, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = p(np.arange(5).reshape((1, 1, 5, 1)))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), [[[[0], [2], [4]]]])
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
 
     def test_write_5d(self):
         with tempfile.TemporaryDirectory() as out_dir:
             image_name = os.path.join(out_dir, "test.nii.gz")
-            img = np.arange(12).reshape((1, 1, 3, 2, 2))
-            write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(
-                out.get_fdata(),
-                np.array([[[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]], [[8.0, 9.0], [10.0, 11.0]]]]]),
-            )
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
-
-            image_name = os.path.join(out_dir, "test1.nii.gz")
-            img = np.arange(10).reshape((1, 1, 5, 1, 2))
-            write_nifti(img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5]))
-            out = nib.load(image_name)
-            np.testing.assert_allclose(out.get_fdata(), np.array([[[[[0.0, 1.0]], [[4.0, 5.0]], [[8.0, 9.0]]]]]))
-            np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
+            for p in TEST_NDARRAYS:
+                img = p(np.arange(12).reshape((1, 1, 3, 2, 2)))
+                write_nifti(img, image_name, affine=np.diag([1]), target_affine=np.diag([1.4]))
+                out = nib.load(image_name)
+                np.testing.assert_allclose(
+                    out.get_fdata(),
+                    np.array([[[[[0.0, 1.0], [2.0, 3.0]], [[4.0, 5.0], [6.0, 7.0]], [[8.0, 9.0], [10.0, 11.0]]]]]),
+                )
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 1, 1, 1]))
+
+                image_name = os.path.join(out_dir, "test1.nii.gz")
+                img = p(np.arange(10).reshape((1, 1, 5, 1, 2)))
+                write_nifti(
+                    img, image_name, affine=np.diag([1, 1, 1, 3, 3]), target_affine=np.diag([1.4, 2.0, 2, 3, 5])
+                )
+                out = nib.load(image_name)
+                np.testing.assert_allclose(out.get_fdata(), np.array([[[[[0.0, 1.0]], [[4.0, 5.0]], [[8.0, 9.0]]]]]))
+                np.testing.assert_allclose(out.affine, np.diag([1.4, 2, 2, 1]))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_normalize_intensity.py b/tests/test_normalize_intensity.py
index 2755eb4c25..41c6b053ec 100644
--- a/tests/test_normalize_intensity.py
+++ b/tests/test_normalize_intensity.py
@@ -31,51 +31,51 @@
                         "divisor": u(np.array([0.5, 0.5, 0.5, 0.5])),
                         "nonzero": True,
                     },
-                    np.array([0.0, 3.0, 0.0, 4.0]),
-                    np.array([0.0, -1.0, 0.0, 1.0]),
+                    p(np.array([0.0, 3.0, 0.0, 4.0])),
+                    p(np.array([0.0, -1.0, 0.0, 1.0])),
                 ]
             )
-    TESTS.append([p, {"nonzero": True}, np.array([0.0, 0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 0.0])])
-    TESTS.append([p, {"nonzero": False}, np.array([0.0, 0.0, 0.0, 0.0]), np.array([0.0, 0.0, 0.0, 0.0])])
-    TESTS.append([p, {"nonzero": False}, np.array([1, 1, 1, 1]), np.array([0.0, 0.0, 0.0, 0.0])])
+    TESTS.append([p, {"nonzero": True}, p(np.array([0.0, 0.0, 0.0, 0.0])), p(np.array([0.0, 0.0, 0.0, 0.0]))])
+    TESTS.append([p, {"nonzero": False}, p(np.array([0.0, 0.0, 0.0, 0.0])), p(np.array([0.0, 0.0, 0.0, 0.0]))])
+    TESTS.append([p, {"nonzero": False}, p(np.array([1, 1, 1, 1])), p(np.array([0.0, 0.0, 0.0, 0.0]))])
     TESTS.append(
         [
             p,
             {"nonzero": False, "channel_wise": True, "subtrahend": [1, 2, 3]},
-            np.ones((3, 2, 2)),
-            np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-2.0, -2.0], [-2.0, -2.0]]]),
+            p(np.ones((3, 2, 2))),
+            p(np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-2.0, -2.0], [-2.0, -2.0]]])),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": True, "subtrahend": [1, 2, 3], "divisor": [0, 0, 2]},
-            np.ones((3, 2, 2)),
-            np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-1.0, -1.0], [-1.0, -1.0]]]),
+            p(np.ones((3, 2, 2))),
+            p(np.array([[[0.0, 0.0], [0.0, 0.0]], [[-1.0, -1.0], [-1.0, -1.0]], [[-1.0, -1.0], [-1.0, -1.0]]])),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": False, "subtrahend": 2, "divisor": 0},
-            np.ones((3, 2, 2)),
-            np.ones((3, 2, 2)) * -1.0,
+            p(np.ones((3, 2, 2))),
+            p(np.ones((3, 2, 2)) * -1.0),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": False, "subtrahend": np.ones((3, 2, 2)) * 0.5, "divisor": 0},
-            np.ones((3, 2, 2)),
-            np.ones((3, 2, 2)) * 0.5,
+            p(np.ones((3, 2, 2))),
+            p(np.ones((3, 2, 2)) * 0.5),
         ]
     )
     TESTS.append(
         [
             p,
             {"nonzero": True, "channel_wise": True, "subtrahend": np.ones((3, 2, 2)) * 0.5, "divisor": [0, 1, 0]},
-            np.ones((3, 2, 2)),
-            np.ones((3, 2, 2)) * 0.5,
+            p(np.ones((3, 2, 2))),
+            p(np.ones((3, 2, 2)) * 0.5),
         ]
     )
 
@@ -91,17 +91,14 @@ def test_default(self, im_type):
             self.assertEqual(im.device, normalized.device)
         self.assertTrue(normalized.dtype in (np.float32, torch.float32))
         expected = (self.imt - np.mean(self.imt)) / np.std(self.imt)
-        assert_allclose(expected, normalized, rtol=1e-3)
+        assert_allclose(normalized, expected, type_test=False, rtol=1e-3)
 
     @parameterized.expand(TESTS)
     def test_nonzero(self, in_type, input_param, input_data, expected_data):
         normalizer = NormalizeIntensity(**input_param)
         im = in_type(input_data)
         normalized = normalizer(im)
-        self.assertEqual(type(im), type(normalized))
-        if isinstance(normalized, torch.Tensor):
-            self.assertEqual(im.device, normalized.device)
-        assert_allclose(expected_data, normalized)
+        assert_allclose(normalized, in_type(expected_data))
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_channel_wise(self, im_type):
@@ -109,10 +106,7 @@ def test_channel_wise(self, im_type):
         input_data = im_type(np.array([[0.0, 3.0, 0.0, 4.0], [0.0, 4.0, 0.0, 5.0]]))
         expected = np.array([[0.0, -1.0, 0.0, 1.0], [0.0, -1.0, 0.0, 1.0]])
         normalized = normalizer(input_data)
-        self.assertEqual(type(input_data), type(normalized))
-        if isinstance(normalized, torch.Tensor):
-            self.assertEqual(input_data.device, normalized.device)
-        assert_allclose(expected, normalized)
+        assert_allclose(normalized, im_type(expected))
 
     @parameterized.expand([[p] for p in TEST_NDARRAYS])
     def test_value_errors(self, im_type):
diff --git a/tests/test_normalize_intensityd.py b/tests/test_normalize_intensityd.py
index e2cec5407a..60b1d05456 100644
--- a/tests/test_normalize_intensityd.py
+++ b/tests/test_normalize_intensityd.py
@@ -25,7 +25,7 @@
             [
                 {"keys": ["img"], "nonzero": True},
                 {"img": p(np.array([0.0, 3.0, 0.0, 4.0]))},
-                np.array([0.0, -1.0, 0.0, 1.0]),
+                p(np.array([0.0, -1.0, 0.0, 1.0])),
             ]
         )
         TESTS.append(
@@ -37,14 +37,14 @@
                     "nonzero": True,
                 },
                 {"img": p(np.array([0.0, 3.0, 0.0, 4.0]))},
-                np.array([0.0, -1.0, 0.0, 1.0]),
+                p(np.array([0.0, -1.0, 0.0, 1.0])),
             ]
         )
         TESTS.append(
             [
                 {"keys": ["img"], "nonzero": True},
                 {"img": p(np.array([0.0, 0.0, 0.0, 0.0]))},
-                np.array([0.0, 0.0, 0.0, 0.0]),
+                p(np.array([0.0, 0.0, 0.0, 0.0])),
             ]
         )
 
@@ -60,7 +60,7 @@ def test_image_normalize_intensityd(self, im_type):
         self.assertEqual(type(im), type(normalized))
         if isinstance(normalized, torch.Tensor):
             self.assertEqual(im.device, normalized.device)
-        assert_allclose(normalized, expected, rtol=1e-3)
+        assert_allclose(normalized, im_type(expected), rtol=1e-3)
 
     @parameterized.expand(TESTS)
     def test_nonzero(self, input_param, input_data, expected_data):
@@ -82,7 +82,7 @@ def test_channel_wise(self, im_type):
         if isinstance(normalized, torch.Tensor):
             self.assertEqual(input_data[key].device, normalized.device)
         expected = np.array([[0.0, -1.0, 0.0, 1.0], [0.0, -1.0, 0.0, 1.0]])
-        assert_allclose(normalized, expected)
+        assert_allclose(normalized, im_type(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_openslide_reader.py b/tests/test_openslide_reader.py
index c0b395fd02..efbd611579 100644
--- a/tests/test_openslide_reader.py
+++ b/tests/test_openslide_reader.py
@@ -24,7 +24,7 @@
 _, has_osl = optional_import("openslide")
 
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 HEIGHT = 32914
diff --git a/tests/test_pad_collation.py b/tests/test_pad_collation.py
index a8c544558f..eda36f4761 100644
--- a/tests/test_pad_collation.py
+++ b/tests/test_pad_collation.py
@@ -20,6 +20,7 @@
 from monai.data import CacheDataset, DataLoader
 from monai.data.utils import decollate_batch, pad_list_data_collate
 from monai.transforms import (
+    Compose,
     PadListDataCollate,
     RandRotate,
     RandRotate90,
@@ -29,24 +30,26 @@
     RandSpatialCropd,
     RandZoom,
     RandZoomd,
+    ToTensor,
+    ToTensord,
 )
 from monai.utils import set_determinism
 
 TESTS: List[Tuple] = []
 
 for pad_collate in [
-    lambda x: pad_list_data_collate(batch=x, method="end", mode="constant", constant_values=1),
-    PadListDataCollate(method="end", mode="constant", constant_values=1),
+    lambda x: pad_list_data_collate(batch=x, method="end", mode="constant"),
+    PadListDataCollate(method="end", mode="constant"),
 ]:
     TESTS.append((dict, pad_collate, RandSpatialCropd("image", roi_size=[8, 7], random_size=True)))
     TESTS.append((dict, pad_collate, RandRotated("image", prob=1, range_x=np.pi, keep_size=False)))
     TESTS.append((dict, pad_collate, RandZoomd("image", prob=1, min_zoom=1.1, max_zoom=2.0, keep_size=False)))
-    TESTS.append((dict, pad_collate, RandRotate90d("image", prob=1, max_k=2)))
+    TESTS.append((dict, pad_collate, Compose([RandRotate90d("image", prob=1, max_k=2), ToTensord("image")])))
 
     TESTS.append((list, pad_collate, RandSpatialCrop(roi_size=[8, 7], random_size=True)))
     TESTS.append((list, pad_collate, RandRotate(prob=1, range_x=np.pi, keep_size=False)))
     TESTS.append((list, pad_collate, RandZoom(prob=1, min_zoom=1.1, max_zoom=2.0, keep_size=False)))
-    TESTS.append((list, pad_collate, RandRotate90(prob=1, max_k=2)))
+    TESTS.append((list, pad_collate, Compose([RandRotate90(prob=1, max_k=2), ToTensor()])))
 
 
 class _Dataset(torch.utils.data.Dataset):
diff --git a/tests/test_patch_wsi_dataset.py b/tests/test_patch_wsi_dataset.py
index f775f28376..50b0e13859 100644
--- a/tests/test_patch_wsi_dataset.py
+++ b/tests/test_patch_wsi_dataset.py
@@ -21,10 +21,11 @@
 from monai.apps.utils import download_url
 from monai.utils import optional_import
 
-_, has_cim = optional_import("cucim")
+_cucim, has_cim = optional_import("cucim")
+has_cim = has_cim and hasattr(_cucim, "CuImage")
 _, has_osl = optional_import("openslide")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 TEST_CASE_0 = [
diff --git a/tests/test_rand_adjust_contrast.py b/tests/test_rand_adjust_contrast.py
index d7d750957d..db408dda42 100644
--- a/tests/test_rand_adjust_contrast.py
+++ b/tests/test_rand_adjust_contrast.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAdjustContrast
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [(0.5, 4.5)]
 
@@ -26,14 +26,16 @@ class TestRandAdjustContrast(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_correct_results(self, gamma):
         adjuster = RandAdjustContrast(prob=1.0, gamma=gamma)
-        result = adjuster(self.imt)
-        epsilon = 1e-7
-        img_min = self.imt.min()
-        img_range = self.imt.max() - img_min
-        expected = (
-            np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.gamma_value) * img_range + img_min
-        )
-        np.testing.assert_allclose(expected, result, rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster(p(self.imt))
+            epsilon = 1e-7
+            img_min = self.imt.min()
+            img_range = self.imt.max() - img_min
+            expected = (
+                np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.gamma_value) * img_range
+                + img_min
+            )
+            assert_allclose(expected, result, rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_adjust_contrastd.py b/tests/test_rand_adjust_contrastd.py
index e4b61293bb..026828a9a3 100644
--- a/tests/test_rand_adjust_contrastd.py
+++ b/tests/test_rand_adjust_contrastd.py
@@ -15,7 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAdjustContrastd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_1 = [(0.5, 4.5)]
 
@@ -26,14 +26,16 @@ class TestRandAdjustContrastd(NumpyImageTestCase2D):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_correct_results(self, gamma):
         adjuster = RandAdjustContrastd("img", prob=1.0, gamma=gamma)
-        result = adjuster({"img": self.imt})
-        epsilon = 1e-7
-        img_min = self.imt.min()
-        img_range = self.imt.max() - img_min
-        expected = (
-            np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.gamma_value) * img_range + img_min
-        )
-        np.testing.assert_allclose(expected, result["img"], rtol=1e-05)
+        for p in TEST_NDARRAYS:
+            result = adjuster({"img": p(self.imt)})
+            epsilon = 1e-7
+            img_min = self.imt.min()
+            img_range = self.imt.max() - img_min
+            expected = (
+                np.power(((self.imt - img_min) / float(img_range + epsilon)), adjuster.gamma_value) * img_range
+                + img_min
+            )
+            assert_allclose(expected, result["img"], rtol=1e-05, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_affine.py b/tests/test_rand_affine.py
index 1e1a23bc09..96322813c9 100644
--- a/tests/test_rand_affine.py
+++ b/tests/test_rand_affine.py
@@ -16,114 +16,134 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAffine
+from monai.utils.type_conversion import convert_data_type
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        dict(as_tensor_output=False, device=None),
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=-1),
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None),
-        {"img": torch.arange(27).reshape((3, 3, 3)), "spatial_size": (2, 2)},
-        np.array([[[2.0, 3.0], [5.0, 6.0]], [[11.0, 12.0], [14.0, 15.0]], [[20.0, 21.0], [23.0, 24.0]]]),
-    ],
-    [
-        dict(as_tensor_output=True, device=None),
-        {"img": torch.ones((1, 3, 3, 3)), "spatial_size": (2, 2, 2)},
-        torch.ones((1, 2, 2, 2)),
-    ],
-    [
-        dict(as_tensor_output=True, device=None, spatial_size=(2, 2, 2), cache_grid=True),
-        {"img": torch.ones((1, 3, 3, 3))},
-        torch.ones((1, 2, 2, 2)),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            padding_mode="zeros",
-            spatial_size=(2, 2, 2),
-            device=None,
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "mode": "bilinear"},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            padding_mode="zeros",
-            spatial_size=(2, 2, 2),
-            cache_grid=True,
-            device=None,
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "mode": "bilinear"},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=True,
-            device=None,
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "spatial_size": (3, 3)},
-        torch.tensor([[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            spatial_size=(3, 3),
-            cache_grid=True,
-            as_tensor_output=True,
-            device=None,
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8))},
-        torch.tensor([[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]),
-    ],
-]
+_rtol = 1e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                dict(device=device),
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(np.arange(27).reshape((3, 3, 3))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=-1),
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(np.arange(27).reshape((3, 3, 3))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device),
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "spatial_size": (2, 2)},
+                p(np.array([[[2.0, 3.0], [5.0, 6.0]], [[11.0, 12.0], [14.0, 15.0]], [[20.0, 21.0], [23.0, 24.0]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device),
+                {"img": p(torch.ones((1, 3, 3, 3))), "spatial_size": (2, 2, 2)},
+                p(torch.ones((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2, 2), cache_grid=True),
+                {"img": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.ones((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    padding_mode="zeros",
+                    spatial_size=(2, 2, 2),
+                    device=device,
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "mode": "bilinear"},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    padding_mode="zeros",
+                    spatial_size=(2, 2, 2),
+                    cache_grid=True,
+                    device=device,
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "mode": "bilinear"},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "spatial_size": (3, 3)},
+                p(
+                    torch.tensor(
+                        [[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    cache_grid=True,
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8)))},
+                p(
+                    torch.tensor(
+                        [[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]
+                    )
+                ),
+            ]
+        )
 
-ARR_NUMPY = np.arange(9 * 10).reshape(1, 9, 10)
-ARR_TORCH = torch.Tensor(ARR_NUMPY)
 TEST_CASES_SKIPPED_CONSISTENCY = []
-for im in (ARR_NUMPY, ARR_TORCH):
-    for as_tensor_output in (True, False):
-        for in_dtype_is_int in (True, False):
-            TEST_CASES_SKIPPED_CONSISTENCY.append((im, as_tensor_output, in_dtype_is_int))
+for p in TEST_NDARRAYS:
+    for in_dtype in (np.int32, np.float32):
+        TEST_CASES_SKIPPED_CONSISTENCY.append((p(np.arange(9 * 10).reshape(1, 9, 10)), in_dtype))
 
 
 class TestRandAffine(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_affine(self, input_param, input_data, expected_val):
         g = RandAffine(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
         if input_param.get("cache_grid", False):
             self.assertTrue(g._cached_grid is not None)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=_rtol, atol=1e-4)
 
     def test_ill_cache(self):
         with self.assertWarns(UserWarning):
@@ -132,15 +152,11 @@ def test_ill_cache(self):
             RandAffine(cache_grid=True, spatial_size=(1, 1, -1))
 
     @parameterized.expand(TEST_CASES_SKIPPED_CONSISTENCY)
-    def test_skipped_transform_consistency(self, im, as_tensor_output, in_dtype_is_int):
-        t1 = RandAffine(prob=0, as_tensor_output=as_tensor_output)
-        t2 = RandAffine(prob=1, spatial_size=(10, 11), as_tensor_output=as_tensor_output)
+    def test_skipped_transform_consistency(self, im, in_dtype):
+        t1 = RandAffine(prob=0)
+        t2 = RandAffine(prob=1, spatial_size=(10, 11))
 
-        # change dtype to int32 or float32
-        if in_dtype_is_int:
-            im = im.astype("int32") if isinstance(im, np.ndarray) else im.int()
-        else:
-            im = im.astype("float32") if isinstance(im, np.ndarray) else im.float()
+        im, *_ = convert_data_type(im, dtype=in_dtype)
 
         out1 = t1(im)
         out2 = t2(im)
diff --git a/tests/test_rand_affine_grid.py b/tests/test_rand_affine_grid.py
index 605d0a30ba..ade615cd65 100644
--- a/tests/test_rand_affine_grid.py
+++ b/tests/test_rand_affine_grid.py
@@ -16,182 +16,194 @@
 from parameterized import parameterized
 
 from monai.transforms import RandAffineGrid
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [{"as_tensor_output": False, "device": None}, {"grid": torch.ones((3, 3, 3))}, np.ones((3, 3, 3))],
-    [
-        {"rotate_range": (1, 2), "translate_range": (3, 3, 3)},
-        {"grid": torch.arange(0, 27).reshape((3, 3, 3))},
-        torch.tensor(
-            np.array(
-                [
-                    [
-                        [-32.81998, -33.910976, -35.001972],
-                        [-36.092968, -37.183964, -38.27496],
-                        [-39.36596, -40.456955, -41.54795],
-                    ],
-                    [[2.1380205, 3.1015975, 4.0651755], [5.028752, 5.9923296, 6.955907], [7.919484, 8.883063, 9.84664]],
-                    [[18.0, 19.0, 20.0], [21.0, 22.0, 23.0], [24.0, 25.0, 26.0]],
-                ]
-            )
-        ),
-    ],
-    [
-        {"translate_range": (3, 3, 3), "as_tensor_output": False, "device": torch.device("cpu:0")},
-        {"spatial_size": (3, 3, 3)},
-        np.array(
+_rtol = 1e-1 if is_tf32_env else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append([{"device": device}, {"grid": p(torch.ones((3, 3, 3)))}, p(np.ones((3, 3, 3)))])
+        TESTS.append(
             [
-                [
-                    [
-                        [0.17881513, 0.17881513, 0.17881513],
-                        [0.17881513, 0.17881513, 0.17881513],
-                        [0.17881513, 0.17881513, 0.17881513],
-                    ],
-                    [
-                        [1.1788151, 1.1788151, 1.1788151],
-                        [1.1788151, 1.1788151, 1.1788151],
-                        [1.1788151, 1.1788151, 1.1788151],
-                    ],
-                    [
-                        [2.1788151, 2.1788151, 2.1788151],
-                        [2.1788151, 2.1788151, 2.1788151],
-                        [2.1788151, 2.1788151, 2.1788151],
-                    ],
-                ],
-                [
-                    [
-                        [-2.283164, -2.283164, -2.283164],
-                        [-1.283164, -1.283164, -1.283164],
-                        [-0.28316402, -0.28316402, -0.28316402],
-                    ],
-                    [
-                        [-2.283164, -2.283164, -2.283164],
-                        [-1.283164, -1.283164, -1.283164],
-                        [-0.28316402, -0.28316402, -0.28316402],
-                    ],
-                    [
-                        [-2.283164, -2.283164, -2.283164],
-                        [-1.283164, -1.283164, -1.283164],
-                        [-0.28316402, -0.28316402, -0.28316402],
-                    ],
-                ],
-                [
-                    [
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                    ],
-                    [
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                    ],
-                    [
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                        [-2.6388912, -1.6388912, -0.6388912],
-                    ],
-                ],
-                [
-                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-                    [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
-                ],
-            ]
-        ),
-    ],
-    [
-        {"rotate_range": (1.0, 1.0, 1.0), "shear_range": (0.1,), "scale_range": (1.2,)},
-        {"grid": torch.arange(0, 108).reshape((4, 3, 3, 3))},
-        torch.tensor(
-            np.array(
-                [
-                    [
-                        [
-                            [-9.4201e00, -8.1672e00, -6.9143e00],
-                            [-5.6614e00, -4.4085e00, -3.1556e00],
-                            [-1.9027e00, -6.4980e-01, 6.0310e-01],
-                        ],
-                        [
-                            [1.8560e00, 3.1089e00, 4.3618e00],
-                            [5.6147e00, 6.8676e00, 8.1205e00],
-                            [9.3734e00, 1.0626e01, 1.1879e01],
-                        ],
+                {"rotate_range": (1, 2), "translate_range": (3, 3, 3)},
+                {"grid": p(torch.arange(0, 27).reshape((3, 3, 3)))},
+                p(
+                    np.array(
                         [
-                            [1.3132e01, 1.4385e01, 1.5638e01],
-                            [1.6891e01, 1.8144e01, 1.9397e01],
-                            [2.0650e01, 2.1902e01, 2.3155e01],
-                        ],
-                    ],
-                    [
-                        [
-                            [9.9383e-02, -4.8845e-01, -1.0763e00],
-                            [-1.6641e00, -2.2519e00, -2.8398e00],
-                            [-3.4276e00, -4.0154e00, -4.6032e00],
-                        ],
-                        [
-                            [-5.1911e00, -5.7789e00, -6.3667e00],
-                            [-6.9546e00, -7.5424e00, -8.1302e00],
-                            [-8.7180e00, -9.3059e00, -9.8937e00],
-                        ],
-                        [
-                            [-1.0482e01, -1.1069e01, -1.1657e01],
-                            [-1.2245e01, -1.2833e01, -1.3421e01],
-                            [-1.4009e01, -1.4596e01, -1.5184e01],
-                        ],
-                    ],
+                            [
+                                [-32.81998, -33.910976, -35.001972],
+                                [-36.092968, -37.183964, -38.27496],
+                                [-39.36596, -40.456955, -41.54795],
+                            ],
+                            [
+                                [2.1380205, 3.1015975, 4.0651755],
+                                [5.028752, 5.9923296, 6.955907],
+                                [7.919484, 8.883063, 9.84664],
+                            ],
+                            [[18.0, 19.0, 20.0], [21.0, 22.0, 23.0], [24.0, 25.0, 26.0]],
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {"translate_range": (3, 3, 3), "device": device},
+                {"spatial_size": (3, 3, 3)},
+                np.array(
                     [
                         [
-                            [5.9635e01, 6.1199e01, 6.2764e01],
-                            [6.4328e01, 6.5892e01, 6.7456e01],
-                            [6.9021e01, 7.0585e01, 7.2149e01],
-                        ],
-                        [
-                            [7.3714e01, 7.5278e01, 7.6842e01],
-                            [7.8407e01, 7.9971e01, 8.1535e01],
-                            [8.3099e01, 8.4664e01, 8.6228e01],
+                            [
+                                [0.17881513, 0.17881513, 0.17881513],
+                                [0.17881513, 0.17881513, 0.17881513],
+                                [0.17881513, 0.17881513, 0.17881513],
+                            ],
+                            [
+                                [1.1788151, 1.1788151, 1.1788151],
+                                [1.1788151, 1.1788151, 1.1788151],
+                                [1.1788151, 1.1788151, 1.1788151],
+                            ],
+                            [
+                                [2.1788151, 2.1788151, 2.1788151],
+                                [2.1788151, 2.1788151, 2.1788151],
+                                [2.1788151, 2.1788151, 2.1788151],
+                            ],
                         ],
                         [
-                            [8.7792e01, 8.9357e01, 9.0921e01],
-                            [9.2485e01, 9.4049e01, 9.5614e01],
-                            [9.7178e01, 9.8742e01, 1.0031e02],
+                            [
+                                [-2.283164, -2.283164, -2.283164],
+                                [-1.283164, -1.283164, -1.283164],
+                                [-0.28316402, -0.28316402, -0.28316402],
+                            ],
+                            [
+                                [-2.283164, -2.283164, -2.283164],
+                                [-1.283164, -1.283164, -1.283164],
+                                [-0.28316402, -0.28316402, -0.28316402],
+                            ],
+                            [
+                                [-2.283164, -2.283164, -2.283164],
+                                [-1.283164, -1.283164, -1.283164],
+                                [-0.28316402, -0.28316402, -0.28316402],
+                            ],
                         ],
-                    ],
-                    [
                         [
-                            [8.1000e01, 8.2000e01, 8.3000e01],
-                            [8.4000e01, 8.5000e01, 8.6000e01],
-                            [8.7000e01, 8.8000e01, 8.9000e01],
+                            [
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                            ],
+                            [
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                            ],
+                            [
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                                [-2.6388912, -1.6388912, -0.6388912],
+                            ],
                         ],
                         [
-                            [9.0000e01, 9.1000e01, 9.2000e01],
-                            [9.3000e01, 9.4000e01, 9.5000e01],
-                            [9.6000e01, 9.7000e01, 9.8000e01],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
+                            [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]],
                         ],
+                    ]
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {"device": device, "rotate_range": (1.0, 1.0, 1.0), "shear_range": (0.1,), "scale_range": (1.2,)},
+                {"grid": p(torch.arange(0, 108).reshape((4, 3, 3, 3)))},
+                p(
+                    np.array(
                         [
-                            [9.9000e01, 1.0000e02, 1.0100e02],
-                            [1.0200e02, 1.0300e02, 1.0400e02],
-                            [1.0500e02, 1.0600e02, 1.0700e02],
-                        ],
-                    ],
-                ]
-            )
-        ),
-    ],
-]
+                            [
+                                [
+                                    [-9.4201e00, -8.1672e00, -6.9143e00],
+                                    [-5.6614e00, -4.4085e00, -3.1556e00],
+                                    [-1.9027e00, -6.4980e-01, 6.0310e-01],
+                                ],
+                                [
+                                    [1.8560e00, 3.1089e00, 4.3618e00],
+                                    [5.6147e00, 6.8676e00, 8.1205e00],
+                                    [9.3734e00, 1.0626e01, 1.1879e01],
+                                ],
+                                [
+                                    [1.3132e01, 1.4385e01, 1.5638e01],
+                                    [1.6891e01, 1.8144e01, 1.9397e01],
+                                    [2.0650e01, 2.1902e01, 2.3155e01],
+                                ],
+                            ],
+                            [
+                                [
+                                    [9.9383e-02, -4.8845e-01, -1.0763e00],
+                                    [-1.6641e00, -2.2519e00, -2.8398e00],
+                                    [-3.4276e00, -4.0154e00, -4.6032e00],
+                                ],
+                                [
+                                    [-5.1911e00, -5.7789e00, -6.3667e00],
+                                    [-6.9546e00, -7.5424e00, -8.1302e00],
+                                    [-8.7180e00, -9.3059e00, -9.8937e00],
+                                ],
+                                [
+                                    [-1.0482e01, -1.1069e01, -1.1657e01],
+                                    [-1.2245e01, -1.2833e01, -1.3421e01],
+                                    [-1.4009e01, -1.4596e01, -1.5184e01],
+                                ],
+                            ],
+                            [
+                                [
+                                    [5.9635e01, 6.1199e01, 6.2764e01],
+                                    [6.4328e01, 6.5892e01, 6.7456e01],
+                                    [6.9021e01, 7.0585e01, 7.2149e01],
+                                ],
+                                [
+                                    [7.3714e01, 7.5278e01, 7.6842e01],
+                                    [7.8407e01, 7.9971e01, 8.1535e01],
+                                    [8.3099e01, 8.4664e01, 8.6228e01],
+                                ],
+                                [
+                                    [8.7792e01, 8.9357e01, 9.0921e01],
+                                    [9.2485e01, 9.4049e01, 9.5614e01],
+                                    [9.7178e01, 9.8742e01, 1.0031e02],
+                                ],
+                            ],
+                            [
+                                [
+                                    [8.1000e01, 8.2000e01, 8.3000e01],
+                                    [8.4000e01, 8.5000e01, 8.6000e01],
+                                    [8.7000e01, 8.8000e01, 8.9000e01],
+                                ],
+                                [
+                                    [9.0000e01, 9.1000e01, 9.2000e01],
+                                    [9.3000e01, 9.4000e01, 9.5000e01],
+                                    [9.6000e01, 9.7000e01, 9.8000e01],
+                                ],
+                                [
+                                    [9.9000e01, 1.0000e02, 1.0100e02],
+                                    [1.0200e02, 1.0300e02, 1.0400e02],
+                                    [1.0500e02, 1.0600e02, 1.0700e02],
+                                ],
+                            ],
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestRandAffineGrid(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_affine_grid(self, input_param, input_data, expected_val):
         g = RandAffineGrid(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        if "device" in input_data:
+            self.assertEqual(result.device, input_data[device])
+        assert_allclose(result, expected_val, type_test=False, rtol=_rtol, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_affined.py b/tests/test_rand_affined.py
index d2f8a60665..651452ab07 100644
--- a/tests/test_rand_affined.py
+++ b/tests/test_rand_affined.py
@@ -17,179 +17,190 @@
 
 from monai.transforms import RandAffined
 from monai.utils import GridSampleMode
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=None, keys=("img", "seg")),
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=(2, 2), keys=("img", "seg")),
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        dict(as_tensor_output=False, device=None, spatial_size=(2, 2), cache_grid=True, keys=("img", "seg")),
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        dict(as_tensor_output=True, device=None, spatial_size=(2, 2, 2), keys=("img", "seg")),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        torch.ones((1, 2, 2, 2)),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            spatial_size=(2, 2, 2),
-            padding_mode="zeros",
-            device=None,
-            keys=("img", "seg"),
-            mode="bilinear",
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=False,
-            spatial_size=(2, 2, 2),
-            padding_mode="zeros",
-            device=None,
-            cache_grid=True,
-            keys=("img", "seg"),
-            mode="bilinear",
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        np.array([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=True,
-            spatial_size=(3, 3),
-            keys=("img", "seg"),
-            device=None,
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        torch.tensor([[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            mode=("bilinear", "nearest"),
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=False,
-            spatial_size=(3, 3),
-            keys=("img", "seg"),
-            device=torch.device("cpu:0"),
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        {
-            "img": np.array(
-                [
-                    [
-                        [18.736153, 15.581954, 12.4277525],
-                        [27.398798, 24.244598, 21.090399],
-                        [36.061443, 32.90724, 29.753046],
-                    ]
-                ]
-            ),
-            "seg": np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]]),
-        },
-    ],
-    [
-        dict(
-            prob=0.9,
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            as_tensor_output=True,
-            spatial_size=(2, 2, 2),
-            padding_mode="zeros",
-            device=None,
-            keys=("img", "seg"),
-            mode=GridSampleMode.BILINEAR,
-        ),
-        {"img": torch.ones((1, 3, 3, 3)), "seg": torch.ones((1, 3, 3, 3))},
-        torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]]),
-    ],
-    [
-        dict(
-            prob=0.9,
-            mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=False,
-            spatial_size=(3, 3),
-            keys=("img", "seg"),
-            device=torch.device("cpu:0"),
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        {
-            "img": np.array(
-                [
-                    [
-                        [18.736153, 15.581954, 12.4277525],
-                        [27.398798, 24.244598, 21.090399],
-                        [36.061443, 32.90724, 29.753046],
-                    ]
-                ]
-            ),
-            "seg": np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]]),
-        },
-    ],
-    [
-        dict(
-            prob=0.9,
-            mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
-            rotate_range=(np.pi / 2,),
-            shear_range=[1, 2],
-            translate_range=[2, 1],
-            scale_range=[0.1, 0.2],
-            as_tensor_output=False,
-            spatial_size=(3, 3),
-            cache_grid=True,
-            keys=("img", "seg"),
-            device=torch.device("cpu:0"),
-        ),
-        {"img": torch.arange(64).reshape((1, 8, 8)), "seg": torch.arange(64).reshape((1, 8, 8))},
-        {
-            "img": np.array(
-                [
-                    [
-                        [18.736153, 15.581954, 12.4277525],
-                        [27.398798, 24.244598, 21.090399],
-                        [36.061443, 32.90724, 29.753046],
-                    ]
-                ]
-            ),
-            "seg": np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]]),
-        },
-    ],
-]
+_rtol = 1e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=None, keys=("img", "seg")),
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(np.arange(27).reshape((3, 3, 3))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2), keys=("img", "seg")),
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2), cache_grid=True, keys=("img", "seg")),
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(device=device, spatial_size=(2, 2, 2), keys=("img", "seg")),
+                {"img": p(torch.ones((1, 3, 3, 3))), "seg": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.ones((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    spatial_size=(2, 2, 2),
+                    padding_mode="zeros",
+                    device=device,
+                    keys=("img", "seg"),
+                    mode="bilinear",
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "seg": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                p(
+                    torch.tensor(
+                        [[[18.7362, 15.5820, 12.4278], [27.3988, 24.2446, 21.0904], [36.0614, 32.9072, 29.7530]]]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    mode=("bilinear", "nearest"),
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                {
+                    "img": p(
+                        np.array(
+                            [
+                                [
+                                    [18.736153, 15.581954, 12.4277525],
+                                    [27.398798, 24.244598, 21.090399],
+                                    [36.061443, 32.90724, 29.753046],
+                                ]
+                            ]
+                        )
+                    ),
+                    "seg": p(np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]])),
+                },
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    spatial_size=(2, 2, 2),
+                    padding_mode="zeros",
+                    device=device,
+                    keys=("img", "seg"),
+                    mode=GridSampleMode.BILINEAR,
+                ),
+                {"img": p(torch.ones((1, 3, 3, 3))), "seg": p(torch.ones((1, 3, 3, 3)))},
+                p(torch.tensor([[[[0.3658, 1.0000], [1.0000, 1.0000]], [[1.0000, 1.0000], [1.0000, 0.9333]]]])),
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                {
+                    "img": p(
+                        np.array(
+                            [
+                                [
+                                    [18.736153, 15.581954, 12.4277525],
+                                    [27.398798, 24.244598, 21.090399],
+                                    [36.061443, 32.90724, 29.753046],
+                                ]
+                            ]
+                        )
+                    ),
+                    "seg": p(np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]])),
+                },
+            ]
+        )
+        TESTS.append(
+            [
+                dict(
+                    prob=0.9,
+                    mode=(GridSampleMode.BILINEAR, GridSampleMode.NEAREST),
+                    rotate_range=(np.pi / 2,),
+                    shear_range=[1, 2],
+                    translate_range=[2, 1],
+                    scale_range=[0.1, 0.2],
+                    spatial_size=(3, 3),
+                    cache_grid=True,
+                    keys=("img", "seg"),
+                    device=device,
+                ),
+                {"img": p(torch.arange(64).reshape((1, 8, 8))), "seg": p(torch.arange(64).reshape((1, 8, 8)))},
+                {
+                    "img": p(
+                        np.array(
+                            [
+                                [
+                                    [18.736153, 15.581954, 12.4277525],
+                                    [27.398798, 24.244598, 21.090399],
+                                    [36.061443, 32.90724, 29.753046],
+                                ]
+                            ]
+                        )
+                    ),
+                    "seg": p(np.array([[[19.0, 20.0, 12.0], [27.0, 28.0, 20.0], [35.0, 36.0, 29.0]]])),
+                },
+            ]
+        )
 
 
 class TestRandAffined(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_affined(self, input_param, input_data, expected_val):
         g = RandAffined(**input_param).set_random_state(123)
         res = g(input_data)
@@ -200,23 +211,21 @@ def test_rand_affined(self, input_param, input_data, expected_val):
             if "_transforms" in key:
                 continue
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected, torch.Tensor))
-            if isinstance(result, torch.Tensor):
-                np.testing.assert_allclose(result.cpu().numpy(), expected.cpu().numpy(), rtol=1e-4, atol=1e-4)
-            else:
-                np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=_rtol, atol=1e-3)
+
+        g.set_random_state(4)
+        res = g(input_data)
+        # affine should be tensor because the resampler only supports pytorch backend
+        self.assertTrue(isinstance(res["img_transforms"][0]["extra_info"]["affine"], torch.Tensor))
 
     def test_ill_cache(self):
         with self.assertWarns(UserWarning):
             # spatial size is None
-            RandAffined(
-                as_tensor_output=False, device=None, spatial_size=None, prob=1.0, cache_grid=True, keys=("img", "seg")
-            )
+            RandAffined(device=device, spatial_size=None, prob=1.0, cache_grid=True, keys=("img", "seg"))
         with self.assertWarns(UserWarning):
             # spatial size is dynamic
             RandAffined(
-                as_tensor_output=False,
-                device=None,
+                device=device,
                 spatial_size=(2, -1),
                 prob=1.0,
                 cache_grid=True,
diff --git a/tests/test_rand_axis_flip.py b/tests/test_rand_axis_flip.py
index c05c3a1e0d..1772ef4987 100644
--- a/tests/test_rand_axis_flip.py
+++ b/tests/test_rand_axis_flip.py
@@ -22,10 +22,8 @@ def test_correct_results(self):
         for p in TEST_NDARRAYS:
             flip = RandAxisFlip(prob=1.0)
             result = flip(p(self.imt[0]))
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, flip._axis))
-            assert_allclose(np.stack(expected), result)
+            expected = [np.flip(channel, flip._axis) for channel in self.imt[0]]
+            assert_allclose(result, p(np.stack(expected)))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_axis_flipd.py b/tests/test_rand_axis_flipd.py
index 7bef0baa63..37a17db69f 100644
--- a/tests/test_rand_axis_flipd.py
+++ b/tests/test_rand_axis_flipd.py
@@ -23,10 +23,8 @@ def test_correct_results(self):
             flip = RandAxisFlipd(keys="img", prob=1.0)
             result = flip({"img": p(self.imt[0])})["img"]
 
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, flip._axis))
-            assert_allclose(np.stack(expected), result)
+            expected = [np.flip(channel, flip._axis) for channel in self.imt[0]]
+            assert_allclose(result, p(np.stack(expected)))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_local_patch_shuffle.py b/tests/test_rand_coarse_shuffle.py
similarity index 55%
rename from tests/test_rand_local_patch_shuffle.py
rename to tests/test_rand_coarse_shuffle.py
index 8e2eefb5d1..0b8cdc6cf8 100644
--- a/tests/test_rand_local_patch_shuffle.py
+++ b/tests/test_rand_coarse_shuffle.py
@@ -14,32 +14,39 @@
 import numpy as np
 from parameterized import parameterized
 
-from monai.transforms import LocalPatchShuffling
+from monai.transforms import RandCoarseShuffle
 
 TEST_CASES = [
     [
-        {"number_blocks": 10, "blocksize_ratio": 1, "prob": 0.0},
+        {"holes": 5, "spatial_size": 1, "max_spatial_size": -1, "prob": 0.0},
         {"img": np.arange(8).reshape((1, 2, 2, 2))},
         np.arange(8).reshape((1, 2, 2, 2)),
     ],
     [
-        {"number_blocks": 10, "blocksize_ratio": 1, "prob": 1.0},
+        {"holes": 10, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
         {"img": np.arange(27).reshape((1, 3, 3, 3))},
-        [
+        np.asarray(
             [
-                [[9, 1, 2], [3, 4, 5], [6, 7, 8]],
-                [[0, 10, 11], [12, 4, 14], [15, 16, 17]],
-                [[18, 19, 20], [21, 22, 23], [24, 25, 26]],
-            ]
-        ],
+                [
+                    [[8, 19, 26], [24, 6, 15], [0, 13, 25]],
+                    [[17, 3, 5], [10, 1, 12], [22, 4, 11]],
+                    [[21, 20, 23], [14, 2, 16], [18, 9, 7]],
+                ],
+            ],
+        ),
+    ],
+    [
+        {"holes": 2, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": np.arange(16).reshape((2, 2, 2, 2))},
+        np.asarray([[[[6, 1], [4, 3]], [[0, 2], [7, 5]]], [[[14, 10], [9, 8]], [[12, 15], [13, 11]]]]),
     ],
 ]
 
 
-class TestLocalPatchShuffle(unittest.TestCase):
+class TestRandCoarseShuffle(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
-    def test_local_patch_shuffle(self, input_param, input_data, expected_val):
-        g = LocalPatchShuffling(**input_param)
+    def test_shuffle(self, input_param, input_data, expected_val):
+        g = RandCoarseShuffle(**input_param)
         g.set_random_state(seed=12)
         result = g(**input_data)
         np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
diff --git a/tests/test_rand_coarse_shuffled.py b/tests/test_rand_coarse_shuffled.py
new file mode 100644
index 0000000000..d2845fdaae
--- /dev/null
+++ b/tests/test_rand_coarse_shuffled.py
@@ -0,0 +1,56 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandCoarseShuffled
+
+TEST_CASES = [
+    [
+        {"keys": "img", "holes": 5, "spatial_size": 1, "max_spatial_size": -1, "prob": 0.0},
+        {"img": np.arange(8).reshape((1, 2, 2, 2))},
+        np.arange(8).reshape((1, 2, 2, 2)),
+    ],
+    [
+        {"keys": "img", "holes": 10, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": np.arange(27).reshape((1, 3, 3, 3))},
+        np.asarray(
+            [
+                [
+                    [[13, 17, 5], [6, 16, 25], [12, 15, 22]],
+                    [[24, 7, 3], [9, 2, 23], [0, 4, 26]],
+                    [[19, 11, 14], [1, 20, 8], [18, 10, 21]],
+                ]
+            ]
+        ),
+    ],
+    [
+        {"keys": "img", "holes": 2, "spatial_size": 1, "max_spatial_size": -1, "prob": 1.0},
+        {"img": np.arange(16).reshape((2, 2, 2, 2))},
+        np.asarray([[[[7, 2], [1, 4]], [[5, 0], [3, 6]]], [[[8, 13], [10, 15]], [[14, 12], [11, 9]]]]),
+    ],
+]
+
+
+class TestRandCoarseShuffled(unittest.TestCase):
+    @parameterized.expand(TEST_CASES)
+    def test_shuffle(self, input_param, input_data, expected_val):
+        g = RandCoarseShuffled(**input_param)
+        g.set_random_state(seed=12)
+        result = g(input_data)
+        np.testing.assert_allclose(result["img"], expected_val, rtol=1e-4, atol=1e-4)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_crop_by_label_classes.py b/tests/test_rand_crop_by_label_classes.py
index b21f971042..d562a44a6d 100644
--- a/tests/test_rand_crop_by_label_classes.py
+++ b/tests/test_rand_crop_by_label_classes.py
@@ -15,68 +15,77 @@
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndices, RandCropByLabelClasses
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
+TESTS_INDICES, TESTS_SHAPE = [], []
+for p in TEST_NDARRAYS:
     # One-Hot label
-    {
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "num_classes": None,
-        "spatial_size": [2, 2, -1],
-        "ratios": [1, 1, 1],
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 3),
-]
+    TESTS_INDICES.append(
+        [
+            {
+                "label": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "num_classes": None,
+                "spatial_size": [2, 2, -1],
+                "ratios": [1, 1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+            },
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            list,
+            (3, 2, 2, 3),
+        ]
+    )
 
-TEST_CASE_1 = [
-    # Argmax label
-    {
-        "label": np.random.randint(0, 2, size=[1, 3, 3, 3]),
-        "num_classes": 2,
-        "spatial_size": [2, 2, 2],
-        "ratios": [1, 1],
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 2),
-]
+    TESTS_INDICES.append(
+        [
+            # Argmax label
+            {
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+                "num_classes": 2,
+                "spatial_size": [2, 2, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+            },
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            list,
+            (3, 2, 2, 2),
+        ]
+    )
 
-TEST_CASE_2 = [
-    # provide label at runtime
-    {
-        "label": None,
-        "num_classes": 2,
-        "spatial_size": [2, 2, 2],
-        "ratios": [1, 1],
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[1, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 2),
-]
+    TESTS_SHAPE.append(
+        [
+            # provide label at runtime
+            {
+                "label": None,
+                "num_classes": 2,
+                "spatial_size": [2, 2, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image_threshold": 0,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+            },
+            list,
+            (3, 2, 2, 2),
+        ]
+    )
 
 
 class TestRandCropByLabelClasses(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand(TESTS_INDICES + TESTS_SHAPE)
     def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
         result = RandCropByLabelClasses(**input_param)(**input_data)
         self.assertIsInstance(result, expected_type)
         self.assertTupleEqual(result[0].shape, expected_shape)
 
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
+    @parameterized.expand(TESTS_INDICES)
     def test_indices(self, input_param, input_data, expected_type, expected_shape):
         input_param["indices"] = ClassesToIndices(num_classes=input_param["num_classes"])(input_param["label"])
         result = RandCropByLabelClasses(**input_param)(**input_data)
diff --git a/tests/test_rand_crop_by_label_classesd.py b/tests/test_rand_crop_by_label_classesd.py
index 829096953b..27fe3425dd 100644
--- a/tests/test_rand_crop_by_label_classesd.py
+++ b/tests/test_rand_crop_by_label_classesd.py
@@ -15,52 +15,59 @@
 from parameterized import parameterized
 
 from monai.transforms import ClassesToIndicesd, RandCropByLabelClassesd
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
-    # One-Hot label
-    {
-        "keys": "img",
-        "label_key": "label",
-        "num_classes": None,
-        "spatial_size": [2, 2, -1],
-        "ratios": [1, 1, 1],
-        "num_samples": 2,
-        "image_key": "image",
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 3),
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            # One-Hot label
+            {
+                "keys": "img",
+                "label_key": "label",
+                "num_classes": None,
+                "spatial_size": [2, 2, -1],
+                "ratios": [1, 1, 1],
+                "num_samples": 2,
+                "image_key": "image",
+                "image_threshold": 0,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+            },
+            list,
+            (3, 2, 2, 3),
+        ]
+    )
 
-TEST_CASE_1 = [
-    # Argmax label
-    {
-        "keys": "img",
-        "label_key": "label",
-        "num_classes": 2,
-        "spatial_size": [2, 2, 2],
-        "ratios": [1, 1],
-        "num_samples": 2,
-        "image_key": "image",
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[1, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 2),
-]
+    TESTS.append(
+        [
+            # Argmax label
+            {
+                "keys": "img",
+                "label_key": "label",
+                "num_classes": 2,
+                "spatial_size": [2, 2, 2],
+                "ratios": [1, 1],
+                "num_samples": 2,
+                "image_key": "image",
+                "image_threshold": 0,
+            },
+            {
+                "img": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "image": p(np.random.randint(0, 2, size=[3, 3, 3, 3])),
+                "label": p(np.random.randint(0, 2, size=[1, 3, 3, 3])),
+            },
+            list,
+            (3, 2, 2, 2),
+        ]
+    )
 
 
 class TestRandCropByLabelClassesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1])
+    @parameterized.expand(TESTS)
     def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
         result = RandCropByLabelClassesd(**input_param)(input_data)
         self.assertIsInstance(result, expected_type)
diff --git a/tests/test_rand_crop_by_pos_neg_label.py b/tests/test_rand_crop_by_pos_neg_label.py
index e0f669ab3f..a81976dea1 100644
--- a/tests/test_rand_crop_by_pos_neg_label.py
+++ b/tests/test_rand_crop_by_pos_neg_label.py
@@ -10,68 +10,93 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
 import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import RandCropByPosNegLabel
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
-    {
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "spatial_size": [2, 2, -1],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 3),
-]
+TESTS = []
+TESTS.append(
+    [
+        {
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "spatial_size": [2, 2, -1],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_threshold": 0,
+        },
+        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
+        (3, 2, 2, 3),
+    ]
+)
+TESTS.append(
+    [
+        {
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_threshold": 0,
+        },
+        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
+        (3, 2, 2, 2),
+    ]
+)
+TESTS.append(
+    [
+        {
+            "label": None,
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_threshold": 0,
+        },
+        {
+            "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+        },
+        (3, 2, 2, 2),
+    ]
+)
 
-TEST_CASE_1 = [
-    {
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-    list,
-    (3, 2, 2, 2),
-]
 
-TEST_CASE_2 = [
-    {
-        "label": None,
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_threshold": 0,
-    },
-    {
-        "img": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-    },
-    list,
-    (3, 2, 2, 2),
-]
+class TestRandCropByPosNegLabel(unittest.TestCase):
+    @staticmethod
+    def convert_data_type(im_type, d, keys=("img", "image", "label")):
+        out = deepcopy(d)
+        for k, v in out.items():
+            if k in keys and isinstance(v, np.ndarray):
+                out[k] = im_type(v)
+        return out
 
+    @parameterized.expand(TESTS)
+    def test_type_shape(self, input_param, input_data, expected_shape):
+        results = []
+        for p in TEST_NDARRAYS:
+            input_param_mod = self.convert_data_type(p, input_param)
+            input_data_mod = self.convert_data_type(p, input_data)
+            cropper = RandCropByPosNegLabel(**input_param_mod)
+            cropper.set_random_state(0)
+            result = cropper(**input_data_mod)
 
-class TestRandCropByPosNegLabel(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
-    def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
-        result = RandCropByPosNegLabel(**input_param)(**input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertTupleEqual(result[0].shape, expected_shape)
+            self.assertIsInstance(result, list)
+            self.assertTupleEqual(result[0].shape, expected_shape)
+
+            # check for same results across numpy, torch.Tensor and torch.cuda.Tensor
+            result = np.asarray([i if isinstance(i, np.ndarray) else i.cpu().numpy() for i in result])
+            results.append(np.asarray(result))
+            if len(results) > 1:
+                np.testing.assert_allclose(results[0], results[-1])
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_crop_by_pos_neg_labeld.py b/tests/test_rand_crop_by_pos_neg_labeld.py
index 17a3e117bb..6d2f39cc54 100644
--- a/tests/test_rand_crop_by_pos_neg_labeld.py
+++ b/tests/test_rand_crop_by_pos_neg_labeld.py
@@ -10,90 +10,101 @@
 # limitations under the License.
 
 import unittest
+from copy import deepcopy
 
 import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import RandCropByPosNegLabeld
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASE_0 = [
-    {
-        "keys": ["image", "extra", "label"],
-        "label_key": "label",
-        "spatial_size": [-1, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image_key": None,
-        "image_threshold": 0,
-    },
-    {
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "image_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
-    },
-    list,
-    (3, 3, 2, 2),
+TESTS = [
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [-1, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+        },
+        {
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "image_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 3, 2, 2),
+    ],
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+        },
+        {
+            "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
+            "label_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 2, 2, 2),
+    ],
+    [
+        {
+            "keys": ["image", "extra", "label"],
+            "label_key": "label",
+            "spatial_size": [2, 2, 2],
+            "pos": 1,
+            "neg": 1,
+            "num_samples": 2,
+            "image_key": None,
+            "image_threshold": 0,
+        },
+        {
+            "image": np.zeros([3, 3, 3, 3]) - 1,
+            "extra": np.zeros([3, 3, 3, 3]),
+            "label": np.ones([3, 3, 3, 3]),
+            "extra_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
+        },
+        (3, 2, 2, 2),
+    ],
 ]
 
-TEST_CASE_1 = [
-    {
-        "keys": ["image", "extra", "label"],
-        "label_key": "label",
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image_key": None,
-        "image_threshold": 0,
-    },
-    {
-        "image": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "extra": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label": np.random.randint(0, 2, size=[3, 3, 3, 3]),
-        "label_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
-    },
-    list,
-    (3, 2, 2, 2),
-]
 
-TEST_CASE_2 = [
-    {
-        "keys": ["image", "extra", "label"],
-        "label_key": "label",
-        "spatial_size": [2, 2, 2],
-        "pos": 1,
-        "neg": 1,
-        "num_samples": 2,
-        "image_key": None,
-        "image_threshold": 0,
-    },
-    {
-        "image": np.zeros([3, 3, 3, 3]) - 1,
-        "extra": np.zeros([3, 3, 3, 3]),
-        "label": np.ones([3, 3, 3, 3]),
-        "extra_meta_dict": {"affine": np.eye(3), "shape": "CHWD"},
-    },
-    list,
-    (3, 2, 2, 2),
-]
+class TestRandCropByPosNegLabeld(unittest.TestCase):
+    @staticmethod
+    def convert_data_type(im_type, d, keys=("img", "image", "label")):
+        out = deepcopy(d)
+        for k, v in out.items():
+            if k in keys and isinstance(v, np.ndarray):
+                out[k] = im_type(v)
+        return out
 
+    @parameterized.expand(TESTS)
+    def test_type_shape(self, input_param, input_data, expected_shape):
+        for p in TEST_NDARRAYS:
+            input_param_mod = self.convert_data_type(p, input_param)
+            input_data_mod = self.convert_data_type(p, input_data)
+            cropper = RandCropByPosNegLabeld(**input_param_mod)
+            cropper.set_random_state(0)
+            result = cropper(input_data_mod)
 
-class TestRandCropByPosNegLabeld(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
-    def test_type_shape(self, input_param, input_data, expected_type, expected_shape):
-        result = RandCropByPosNegLabeld(**input_param)(input_data)
-        self.assertIsInstance(result, expected_type)
-        self.assertTupleEqual(result[0]["image"].shape, expected_shape)
-        self.assertTupleEqual(result[0]["extra"].shape, expected_shape)
-        self.assertTupleEqual(result[0]["label"].shape, expected_shape)
-        _len = len(tuple(input_data.keys()))
-        self.assertTupleEqual(tuple(result[0].keys())[:_len], tuple(input_data.keys()))
-        for i, item in enumerate(result):
-            self.assertEqual(item["image_meta_dict"]["patch_index"], i)
-            self.assertEqual(item["label_meta_dict"]["patch_index"], i)
-            self.assertEqual(item["extra_meta_dict"]["patch_index"], i)
+            self.assertIsInstance(result, list)
+
+            _len = len(tuple(input_data.keys()))
+            self.assertTupleEqual(tuple(result[0].keys())[:_len], tuple(input_data.keys()))
+            for k in ("image", "extra", "label"):
+                self.assertTupleEqual(result[0][k].shape, expected_shape)
+                for i, item in enumerate(result):
+                    self.assertEqual(item[k + "_meta_dict"]["patch_index"], i)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_cucim_dict_transform.py b/tests/test_rand_cucim_dict_transform.py
new file mode 100644
index 0000000000..c084331e0e
--- /dev/null
+++ b/tests/test_rand_cucim_dict_transform.py
@@ -0,0 +1,185 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandCuCIMd
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_RAND_ROTATE_1 = [
+    {"name": "rand_image_rotate_90", "prob": 1.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_RAND_ROTATE_2 = [
+    {"name": "rand_image_rotate_90", "prob": 0.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_1 = [
+    {"name": "rand_zoom", "prob": 1.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_2 = [
+    {"name": "rand_zoom", "prob": 0.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestRandCuCIMDict(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        input = {"image": input}
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", apply_prob=1.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = {"image": input[cp.newaxis, ...]}
+        expected = expected[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", apply_prob=1.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = {"image": cp.asarray(input)}
+        expected = cp.asarray(expected)
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", apply_prob=1.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = {"image": cp.asarray(input)[cp.newaxis, ...]}
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIMd(keys="image", **params)(input)["image"]
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIMd(keys="image", apply_prob=0.0, **params)(input)["image"]
+        self.assertTrue(output.dtype == input["image"].dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input["image"])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_cucim_transform.py b/tests/test_rand_cucim_transform.py
new file mode 100644
index 0000000000..907bc35e01
--- /dev/null
+++ b/tests/test_rand_cucim_transform.py
@@ -0,0 +1,184 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from parameterized import parameterized
+
+from monai.transforms import RandCuCIM
+from monai.utils import optional_import, set_determinism
+from tests.utils import skip_if_no_cuda
+
+_, has_cut = optional_import("cucim.core.operations.expose.transform")
+cp, has_cp = optional_import("cupy")
+
+set_determinism(seed=0)
+
+TEST_CASE_COLOR_JITTER_1 = [
+    {"name": "color_jitter", "brightness": 0.0, "contrast": 0.0, "saturation": 0.0, "hue": 0.0},
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+    np.array([[[0, 1], [2, 3]], [[0, 10], [20, 30]], [[0, 50], [100, 150]]], dtype=np.uint8),
+]
+
+TEST_CASE_FLIP_1 = [
+    {"name": "image_flip", "spatial_axis": -1},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]], [[1.0, 0.0], [3.0, 2.0]]], dtype=np.float32),
+]
+
+TEST_CASE_RAND_ROTATE_1 = [
+    {"name": "rand_image_rotate_90", "prob": 1.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]], [[1.0, 3.0], [0.0, 2.0]]], dtype=np.float32),
+]
+
+
+TEST_CASE_RAND_ROTATE_2 = [
+    {"name": "rand_image_rotate_90", "prob": 0.0, "max_k": 1, "spatial_axis": (-2, -1)},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+]
+
+TEST_CASE_SCALE_INTENSITY_1 = [
+    {"name": "scale_intensity_range", "a_min": 0.0, "a_max": 4.0, "b_min": 0.0, "b_max": 1.0, "clip": False},
+    np.array([[[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]], [[0.0, 1.0], [2.0, 3.0]]], dtype=np.float32),
+    np.array([[[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]], [[0.0, 0.25], [0.5, 0.75]]], dtype=np.float32),
+]
+
+TEST_CASE_ZOOM_1 = [
+    {"name": "zoom", "zoom_factor": (0.5, 0.5)},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_1 = [
+    {"name": "rand_zoom", "prob": 1.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.concatenate([np.ones((1, 3, 3), dtype=np.float32) * 1.0, np.ones((1, 3, 3), dtype=np.float32) * 2.0]),
+]
+
+TEST_CASE_RAND_ZOOM_2 = [
+    {"name": "rand_zoom", "prob": 0.0, "min_zoom": 0.5, "max_zoom": 0.5},
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+    np.mgrid[:3, 1:4].astype(dtype=np.float32),
+]
+
+
+@skip_if_no_cuda
+@unittest.skipUnless(has_cp, "CuPy is required.")
+@unittest.skipUnless(has_cut, "cuCIM transforms are required.")
+class TestRandCuCIM(unittest.TestCase):
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_single(self, params, input, expected):
+        # apply_prob=1.0
+        output = RandCuCIM(apply_prob=1.0, **params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_numpy_batch(self, params, input, expected):
+        input = input[cp.newaxis, ...]
+        expected = expected[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIM(apply_prob=1.0, **params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, np.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_single(self, params, input, expected):
+        input = cp.asarray(input)
+        expected = cp.asarray(expected)
+        # apply_prob=1.0
+        output = RandCuCIM(apply_prob=1.0, **params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+    @parameterized.expand(
+        [
+            TEST_CASE_COLOR_JITTER_1,
+            TEST_CASE_FLIP_1,
+            TEST_CASE_RAND_ROTATE_1,
+            TEST_CASE_RAND_ROTATE_2,
+            TEST_CASE_SCALE_INTENSITY_1,
+            TEST_CASE_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_1,
+            TEST_CASE_RAND_ZOOM_2,
+        ]
+    )
+    def test_tramsforms_cupy_batch(self, params, input, expected):
+        input = cp.asarray(input)[cp.newaxis, ...]
+        expected = cp.asarray(expected)[cp.newaxis, ...]
+        # apply_prob=1.0
+        output = RandCuCIM(**params)(input)
+        self.assertTrue(output.dtype == expected.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, expected)
+        # apply_prob=0.0
+        output = RandCuCIM(apply_prob=0.0, **params)(input)
+        self.assertTrue(output.dtype == input.dtype)
+        self.assertTrue(isinstance(output, cp.ndarray))
+        cp.testing.assert_allclose(output, input)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rand_deform_grid.py b/tests/test_rand_deform_grid.py
index 7c12c263d2..4725e28339 100644
--- a/tests/test_rand_deform_grid.py
+++ b/tests/test_rand_deform_grid.py
@@ -12,10 +12,10 @@
 import unittest
 
 import numpy as np
-import torch
 from parameterized import parameterized
 
 from monai.transforms import RandDeformGrid
+from tests.utils import assert_allclose
 
 TEST_CASES = [
     [
@@ -129,11 +129,7 @@ def test_rand_deform_grid(self, input_param, input_data, expected_val):
         g = RandDeformGrid(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, type_test=False, rtol=1e-3, atol=1e-3)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elastic_2d.py b/tests/test_rand_elastic_2d.py
index fbfb7d5761..22920d0f35 100644
--- a/tests/test_rand_elastic_2d.py
+++ b/tests/test_rand_elastic_2d.py
@@ -16,90 +16,103 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand2DElastic
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "as_tensor_output": False, "device": None},
-        {"img": torch.ones((3, 3, 3)), "spatial_size": (2, 2)},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "as_tensor_output": False, "device": None},
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.arange(27).reshape((3, 3, 3)),
-    ],
-    [
-        {
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-            "padding_mode": "zeros",
-        },
-        {"img": torch.ones((3, 3, 3)), "spatial_size": (2, 2), "mode": "bilinear"},
-        np.array(
+_rtol = 5e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "device": device},
+                {"img": p(torch.ones((3, 3, 3))), "spatial_size": (2, 2)},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {"spacing": (0.3, 0.3), "magnitude_range": (1.0, 2.0), "prob": 0.0, "device": device},
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(np.arange(27).reshape((3, 3, 3))),
+            ]
+        )
+        TESTS.append(
             [
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
+                {
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "device": device,
+                    "padding_mode": "zeros",
+                },
+                {"img": p(torch.ones((3, 3, 3))), "spatial_size": (2, 2), "mode": "bilinear"},
+                p(
+                    np.array(
+                        [
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "spacing": (1.0, 1.0),
-            "magnitude_range": (1.0, 1.0),
-            "scale_range": [1.2, 2.2],
-            "prob": 0.9,
-            "padding_mode": "border",
-            "as_tensor_output": True,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        torch.tensor(
+        )
+        TESTS.append(
             [
-                [[3.0793, 2.6141], [4.0568, 5.9978]],
-                [[12.0793, 11.6141], [13.0568, 14.9978]],
-                [[21.0793, 20.6141], [22.0568, 23.9978]],
+                {
+                    "spacing": (1.0, 1.0),
+                    "magnitude_range": (1.0, 1.0),
+                    "scale_range": [1.2, 2.2],
+                    "prob": 0.9,
+                    "padding_mode": "border",
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [[3.0793, 2.6141], [4.0568, 5.9978]],
+                            [[12.0793, 11.6141], [13.0568, 14.9978]],
+                            [[21.0793, 20.6141], [22.0568, 23.9978]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.1, 0.2),
-            "translate_range": [-0.01, 0.01],
-            "scale_range": [0.01, 0.02],
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": "cuda" if torch.cuda.is_available() else "cpu",
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3))},
-        np.array(
+        )
+        TESTS.append(
             [
-                [[1.3584113, 1.9251312], [5.626623, 6.642721]],
-                [[10.358411, 10.925131], [14.626623, 15.642721]],
-                [[19.358412, 19.92513], [23.626623, 24.642721]],
+                {
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.1, 0.2),
+                    "translate_range": [-0.01, 0.01],
+                    "scale_range": [0.01, 0.02],
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [[1.3584113, 1.9251312], [5.626623, 6.642721]],
+                            [[10.358411, 10.925131], [14.626623, 15.642721]],
+                            [[19.358412, 19.92513], [23.626623, 24.642721]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-]
+        )
 
 
 class TestRand2DElastic(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_2d_elastic(self, input_param, input_data, expected_val):
         g = Rand2DElastic(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=_rtol, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elastic_3d.py b/tests/test_rand_elastic_3d.py
index c63282d571..d44324746f 100644
--- a/tests/test_rand_elastic_3d.py
+++ b/tests/test_rand_elastic_3d.py
@@ -16,69 +16,89 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand3DElastic
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        {
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": -1,
-        },
-        {"img": torch.arange(72).reshape((2, 3, 3, 4))},
-        np.arange(72).reshape((2, 3, 3, 4)),
-    ],
-    [
-        {
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-        },
-        {"img": torch.ones((2, 3, 3, 3)), "spatial_size": (2, 2, 2)},
-        np.ones((2, 2, 2, 2)),
-    ],
-    [
-        {
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "spatial_size": (2, 2, 2)},
-        np.array([[[[6.4939356, 7.50289], [9.518351, 10.522849]], [[15.512375, 16.523542], [18.531467, 19.53646]]]]),
-    ],
-    [
-        {
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "rotate_range": [1, 1, 1],
-            "as_tensor_output": False,
-            "device": "cuda" if torch.cuda.is_available() else "cpu",
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "mode": "bilinear"},
-        np.array([[[[5.0069294, 9.463932], [9.287769, 13.739735]], [[12.319424, 16.777205], [16.594296, 21.045748]]]]),
-    ],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": -1,
+                },
+                {"img": p(torch.arange(72).reshape((2, 3, 3, 4)))},
+                p(np.arange(72).reshape((2, 3, 3, 4))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                },
+                {"img": p(torch.ones((2, 3, 3, 3))), "spatial_size": (2, 2, 2)},
+                p(np.ones((2, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "device": device,
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "spatial_size": (2, 2, 2)},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[6.4939356, 7.50289], [9.518351, 10.522849]],
+                                [[15.512375, 16.523542], [18.531467, 19.53646]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "rotate_range": [1, 1, 1],
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "mode": "bilinear"},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[5.0069294, 9.463932], [9.287769, 13.739735]],
+                                [[12.319424, 16.777205], [16.594296, 21.045748]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
 
 
 class TestRand3DElastic(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_3d_elastic(self, input_param, input_data, expected_val):
         g = Rand3DElastic(**input_param)
         g.set_random_state(123)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elasticd_2d.py b/tests/test_rand_elasticd_2d.py
index f8eb026088..77e6489d50 100644
--- a/tests/test_rand_elasticd_2d.py
+++ b/tests/test_rand_elasticd_2d.py
@@ -16,127 +16,149 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand2DElasticd
+from tests.utils import TEST_NDARRAYS, assert_allclose, is_tf32_env
 
-TEST_CASES = [
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (1.0, 2.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.ones((3, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.3, 0.3),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": -1,
-        },
-        {"img": torch.arange(4).reshape((1, 2, 2)), "seg": torch.arange(4).reshape((1, 2, 2))},
-        np.arange(4).reshape((1, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "padding_mode": "zeros",
-            "device": None,
-            "spatial_size": (2, 2),
-            "mode": "bilinear",
-        },
-        {"img": torch.ones((3, 3, 3)), "seg": torch.ones((3, 3, 3))},
-        np.array(
+_rtol = 5e-3 if is_tf32_env() else 1e-4
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (1.0, 2.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(np.ones((3, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.3, 0.3),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": -1,
+                },
+                {"img": p(torch.arange(4).reshape((1, 2, 2))), "seg": p(torch.arange(4).reshape((1, 2, 2)))},
+                p(np.arange(4).reshape((1, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "padding_mode": "zeros",
+                    "device": device,
+                    "spatial_size": (2, 2),
+                    "mode": "bilinear",
+                },
+                {"img": p(torch.ones((3, 3, 3))), "seg": p(torch.ones((3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                            [[0.45531988, 0.0], [0.0, 0.71558857]],
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
             [
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
-                [[0.45531988, 0.0], [0.0, 0.71558857]],
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (1.0, 1.0),
+                    "magnitude_range": (1.0, 1.0),
+                    "scale_range": [1.2, 2.2],
+                    "prob": 0.9,
+                    "padding_mode": "border",
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    torch.tensor(
+                        [
+                            [[3.0793, 2.6141], [4.0568, 5.9978]],
+                            [[12.0793, 11.6141], [13.0568, 14.9978]],
+                            [[21.0793, 20.6141], [22.0568, 23.9978]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (1.0, 1.0),
-            "magnitude_range": (1.0, 1.0),
-            "scale_range": [1.2, 2.2],
-            "prob": 0.9,
-            "padding_mode": "border",
-            "as_tensor_output": True,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        torch.tensor(
+        )
+        TESTS.append(
             [
-                [[3.0793, 2.6141], [4.0568, 5.9978]],
-                [[12.0793, 11.6141], [13.0568, 14.9978]],
-                [[21.0793, 20.6141], [22.0568, 23.9978]],
+                {
+                    "keys": ("img", "seg"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.1, 0.2),
+                    "translate_range": [-0.01, 0.01],
+                    "scale_range": [0.01, 0.02],
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [[1.3584113, 1.9251312], [5.626623, 6.642721]],
+                            [[10.358411, 10.925131], [14.626623, 15.642721]],
+                            [[19.358412, 19.92513], [23.626623, 24.642721]],
+                        ]
+                    )
+                ),
             ]
-        ),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.1, 0.2),
-            "translate_range": [-0.01, 0.01],
-            "scale_range": [0.01, 0.02],
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        np.array(
+        )
+        TESTS.append(
             [
-                [[1.3584113, 1.9251312], [5.626623, 6.642721]],
-                [[10.358411, 10.925131], [14.626623, 15.642721]],
-                [[19.358412, 19.92513], [23.626623, 24.642721]],
+                {
+                    "keys": ("img", "seg"),
+                    "mode": ("bilinear", "nearest"),
+                    "spacing": (0.3, 0.3),
+                    "magnitude_range": (0.1, 0.2),
+                    "translate_range": [-0.01, 0.01],
+                    "scale_range": [0.01, 0.02],
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((3, 3, 3))), "seg": p(torch.arange(27).reshape((3, 3, 3)))},
+                {
+                    "img": p(
+                        torch.tensor(
+                            [
+                                [[1.3584, 1.9251], [5.6266, 6.6427]],
+                                [[10.3584, 10.9251], [14.6266, 15.6427]],
+                                [[19.3584, 19.9251], [23.6266, 24.6427]],
+                            ]
+                        )
+                    ),
+                    "seg": p(
+                        torch.tensor(
+                            [[[0.0, 2.0], [6.0, 8.0]], [[9.0, 11.0], [15.0, 17.0]], [[18.0, 20.0], [24.0, 26.0]]]
+                        )
+                    ),
+                },
             ]
-        ),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "mode": ("bilinear", "nearest"),
-            "spacing": (0.3, 0.3),
-            "magnitude_range": (0.1, 0.2),
-            "translate_range": [-0.01, 0.01],
-            "scale_range": [0.01, 0.02],
-            "prob": 0.9,
-            "as_tensor_output": True,
-            "device": None,
-            "spatial_size": (2, 2),
-        },
-        {"img": torch.arange(27).reshape((3, 3, 3)), "seg": torch.arange(27).reshape((3, 3, 3))},
-        {
-            "img": torch.tensor(
-                [
-                    [[1.3584, 1.9251], [5.6266, 6.6427]],
-                    [[10.3584, 10.9251], [14.6266, 15.6427]],
-                    [[19.3584, 19.9251], [23.6266, 24.6427]],
-                ]
-            ),
-            "seg": torch.tensor([[[0.0, 2.0], [6.0, 8.0]], [[9.0, 11.0], [15.0, 17.0]], [[18.0, 20.0], [24.0, 26.0]]]),
-        },
-    ],
-]
+        )
 
 
 class TestRand2DElasticd(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_2d_elasticd(self, input_param, input_data, expected_val):
         g = Rand2DElasticd(**input_param)
         g.set_random_state(123)
@@ -144,11 +166,7 @@ def test_rand_2d_elasticd(self, input_param, input_data, expected_val):
         for key in res:
             result = res[key]
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected, torch.Tensor))
-            if isinstance(result, torch.Tensor):
-                np.testing.assert_allclose(result.cpu().numpy(), expected.cpu().numpy(), rtol=1e-4, atol=1e-4)
-            else:
-                np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=_rtol, atol=5e-3)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_elasticd_3d.py b/tests/test_rand_elasticd_3d.py
index 47ab814882..5f8a5f47ed 100644
--- a/tests/test_rand_elasticd_3d.py
+++ b/tests/test_rand_elasticd_3d.py
@@ -16,98 +16,128 @@
 from parameterized import parameterized
 
 from monai.transforms import Rand3DElasticd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.ones((2, 3, 3, 3)), "seg": torch.ones((2, 3, 3, 3))},
-        np.ones((2, 2, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, -1, -1),
-        },
-        {"img": torch.ones((2, 3, 3, 3)), "seg": torch.ones((2, 3, 3, 3))},
-        np.ones((2, 2, 3, 3)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 2.3),
-            "sigma_range": (1.0, 20.0),
-            "prob": 0.0,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": -1,
-        },
-        {"img": torch.arange(8).reshape((1, 2, 2, 2)), "seg": torch.arange(8).reshape((1, 2, 2, 2))},
-        np.arange(8).reshape((1, 2, 2, 2)),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "seg": torch.arange(27).reshape((1, 3, 3, 3))},
-        np.array([[[[6.4939356, 7.50289], [9.518351, 10.522849]], [[15.512375, 16.523542], [18.531467, 19.53646]]]]),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "rotate_range": [1, 1, 1],
-            "as_tensor_output": False,
-            "device": None,
-            "spatial_size": (2, 2, 2),
-            "mode": "bilinear",
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "seg": torch.arange(27).reshape((1, 3, 3, 3))},
-        np.array([[[[5.0069294, 9.463932], [9.287769, 13.739735]], [[12.319424, 16.777205], [16.594296, 21.045748]]]]),
-    ],
-    [
-        {
-            "keys": ("img", "seg"),
-            "mode": ("bilinear", "nearest"),
-            "magnitude_range": (0.3, 0.3),
-            "sigma_range": (1.0, 2.0),
-            "prob": 0.9,
-            "rotate_range": [1, 1, 1],
-            "as_tensor_output": True,
-            "device": torch.device("cpu:0"),
-            "spatial_size": (2, 2, 2),
-        },
-        {"img": torch.arange(27).reshape((1, 3, 3, 3)), "seg": torch.arange(27).reshape((1, 3, 3, 3))},
-        {
-            "img": torch.tensor([[[[5.0069, 9.4639], [9.2878, 13.7397]], [[12.3194, 16.7772], [16.5943, 21.0457]]]]),
-            "seg": torch.tensor([[[[4.0, 14.0], [7.0, 14.0]], [[9.0, 19.0], [12.0, 22.0]]]]),
-        },
-    ],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.ones((2, 3, 3, 3))), "seg": p(torch.ones((2, 3, 3, 3)))},
+                p(np.ones((2, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": (2, -1, -1),
+                },
+                {"img": p(torch.ones((2, 3, 3, 3))), "seg": p(torch.ones((2, 3, 3, 3)))},
+                p(np.ones((2, 2, 3, 3))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 2.3),
+                    "sigma_range": (1.0, 20.0),
+                    "prob": 0.0,
+                    "device": device,
+                    "spatial_size": -1,
+                },
+                {"img": p(torch.arange(8).reshape((1, 2, 2, 2))), "seg": p(torch.arange(8).reshape((1, 2, 2, 2)))},
+                p(np.arange(8).reshape((1, 2, 2, 2))),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "seg": p(torch.arange(27).reshape((1, 3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[6.4939356, 7.50289], [9.518351, 10.522849]],
+                                [[15.512375, 16.523542], [18.531467, 19.53646]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "rotate_range": [1, 1, 1],
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                    "mode": "bilinear",
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "seg": p(torch.arange(27).reshape((1, 3, 3, 3)))},
+                p(
+                    np.array(
+                        [
+                            [
+                                [[5.0069294, 9.463932], [9.287769, 13.739735]],
+                                [[12.319424, 16.777205], [16.594296, 21.045748]],
+                            ]
+                        ]
+                    )
+                ),
+            ]
+        )
+        TESTS.append(
+            [
+                {
+                    "keys": ("img", "seg"),
+                    "mode": ("bilinear", "nearest"),
+                    "magnitude_range": (0.3, 0.3),
+                    "sigma_range": (1.0, 2.0),
+                    "prob": 0.9,
+                    "rotate_range": [1, 1, 1],
+                    "device": device,
+                    "spatial_size": (2, 2, 2),
+                },
+                {"img": p(torch.arange(27).reshape((1, 3, 3, 3))), "seg": p(torch.arange(27).reshape((1, 3, 3, 3)))},
+                {
+                    "img": p(
+                        torch.tensor(
+                            [[[[5.0069, 9.4639], [9.2878, 13.7397]], [[12.3194, 16.7772], [16.5943, 21.0457]]]]
+                        )
+                    ),
+                    "seg": p(torch.tensor([[[[4.0, 14.0], [7.0, 14.0]], [[9.0, 19.0], [12.0, 22.0]]]])),
+                },
+            ]
+        )
 
 
 class TestRand3DElasticd(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_3d_elasticd(self, input_param, input_data, expected_val):
         g = Rand3DElasticd(**input_param)
         g.set_random_state(123)
@@ -115,11 +145,7 @@ def test_rand_3d_elasticd(self, input_param, input_data, expected_val):
         for key in res:
             result = res[key]
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected, torch.Tensor))
-            if isinstance(result, torch.Tensor):
-                np.testing.assert_allclose(result.cpu().numpy(), expected.cpu().numpy(), rtol=1e-4, atol=1e-4)
-            else:
-                np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_flip.py b/tests/test_rand_flip.py
index b3c514cb1f..df49d60861 100644
--- a/tests/test_rand_flip.py
+++ b/tests/test_rand_flip.py
@@ -34,12 +34,10 @@ def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             im = p(self.imt[0])
             flip = RandFlip(prob=1.0, spatial_axis=spatial_axis)
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
             result = flip(im)
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_flipd.py b/tests/test_rand_flipd.py
index 8972024fd8..c2869537cb 100644
--- a/tests/test_rand_flipd.py
+++ b/tests/test_rand_flipd.py
@@ -26,11 +26,9 @@ def test_correct_results(self, _, spatial_axis):
         for p in TEST_NDARRAYS:
             flip = RandFlipd(keys="img", prob=1.0, spatial_axis=spatial_axis)
             result = flip({"img": p(self.imt[0])})["img"]
-            expected = []
-            for channel in self.imt[0]:
-                expected.append(np.flip(channel, spatial_axis))
+            expected = [np.flip(channel, spatial_axis) for channel in self.imt[0]]
             expected = np.stack(expected)
-            assert_allclose(expected, result)
+            assert_allclose(result, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_sharpen.py b/tests/test_rand_gaussian_sharpen.py
index 909f96f56b..4804fc2422 100644
--- a/tests/test_rand_gaussian_sharpen.py
+++ b/tests/test_rand_gaussian_sharpen.py
@@ -11,88 +11,127 @@
 
 import unittest
 
-import numpy as np
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSharpen
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[5.2919216, 5.5854445, 5.29192], [11.3982, 12.62332, 11.398202], [14.870525, 17.323769, 14.870527]],
-            [[20.413757, 22.767355, 20.413757], [28.495504, 31.558315, 28.495499], [29.99236, 34.505676, 29.992361]],
+            {"prob": 1.0},
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [5.2919216, 5.5854445, 5.29192],
+                        [11.3982, 12.62332, 11.398202],
+                        [14.870525, 17.323769, 14.870527],
+                    ],
+                    [
+                        [20.413757, 22.767355, 20.413757],
+                        [28.495504, 31.558315, 28.495499],
+                        [29.99236, 34.505676, 29.992361],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": 0.4,
-        "sigma2_y": 0.4,
-        "sigma2_z": 0.4,
-        "prob": 1.0,
-    },
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.1071496, 3.597953, 4.1071477], [10.062014, 9.825114, 10.0620165], [14.698058, 15.818766, 14.698058]],
-            [[18.211048, 18.16049, 18.211048], [25.155039, 24.56279, 25.155039], [28.801964, 30.381308, 28.801964]],
+            {
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": 0.4,
+                "sigma2_y": 0.4,
+                "sigma2_z": 0.4,
+                "prob": 1.0,
+            },
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.1071496, 3.597953, 4.1071477],
+                        [10.062014, 9.825114, 10.0620165],
+                        [14.698058, 15.818766, 14.698058],
+                    ],
+                    [
+                        [18.211048, 18.16049, 18.211048],
+                        [25.155039, 24.56279, 25.155039],
+                        [28.801964, 30.381308, 28.801964],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "prob": 1.0,
-    },
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.81077, 4.4237204, 4.81077], [12.061236, 12.298177, 12.061236], [17.362553, 19.201174, 17.362553]],
-            [[21.440754, 22.142393, 21.440754], [30.15308, 30.745445, 30.153086], [33.99255, 36.919838, 33.99255]],
+            {
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "prob": 1.0,
+            },
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.81077, 4.4237204, 4.81077],
+                        [12.061236, 12.298177, 12.061236],
+                        [17.362553, 19.201174, 17.362553],
+                    ],
+                    [
+                        [21.440754, 22.142393, 21.440754],
+                        [30.15308, 30.745445, 30.153086],
+                        [33.99255, 36.919838, 33.99255],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_4 = [
-    {
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "approx": "scalespace",
-        "prob": 1.0,
-    },
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[4.430213, 3.2278745, 4.4302144], [10.325399, 8.507457, 10.325399], [17.494898, 16.5609, 17.494894]],
-            [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+            {
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "approx": "scalespace",
+                "prob": 1.0,
+            },
+            p([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
+            p(
+                [
+                    [
+                        [4.430213, 3.2278745, 4.4302144],
+                        [10.325399, 8.507457, 10.325399],
+                        [17.494898, 16.5609, 17.494894],
+                    ],
+                    [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSharpen(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSharpen(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, atol=0, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_sharpend.py b/tests/test_rand_gaussian_sharpend.py
index 9ba29ee71b..3508ebaa19 100644
--- a/tests/test_rand_gaussian_sharpend.py
+++ b/tests/test_rand_gaussian_sharpend.py
@@ -15,87 +15,126 @@
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSharpend
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img", "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[5.2919216, 5.5854445, 5.29192], [11.3982, 12.62332, 11.398202], [14.870525, 17.323769, 14.870527]],
-            [[20.413757, 22.767355, 20.413757], [28.495504, 31.558315, 28.495499], [29.99236, 34.505676, 29.992361]],
+            {"keys": "img", "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [5.2919216, 5.5854445, 5.29192],
+                        [11.3982, 12.62332, 11.398202],
+                        [14.870525, 17.323769, 14.870527],
+                    ],
+                    [
+                        [20.413757, 22.767355, 20.413757],
+                        [28.495504, 31.558315, 28.495499],
+                        [29.99236, 34.505676, 29.992361],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {
-        "keys": "img",
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": 0.4,
-        "sigma2_y": 0.4,
-        "sigma2_z": 0.4,
-        "prob": 1.0,
-    },
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.1071496, 3.597953, 4.1071477], [10.062014, 9.825114, 10.0620165], [14.698058, 15.818766, 14.698058]],
-            [[18.211048, 18.16049, 18.211048], [25.155039, 24.56279, 25.155039], [28.801964, 30.381308, 28.801964]],
+            {
+                "keys": "img",
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": 0.4,
+                "sigma2_y": 0.4,
+                "sigma2_z": 0.4,
+                "prob": 1.0,
+            },
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.1071496, 3.597953, 4.1071477],
+                        [10.062014, 9.825114, 10.0620165],
+                        [14.698058, 15.818766, 14.698058],
+                    ],
+                    [
+                        [18.211048, 18.16049, 18.211048],
+                        [25.155039, 24.56279, 25.155039],
+                        [28.801964, 30.381308, 28.801964],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {
-        "keys": "img",
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "prob": 1.0,
-    },
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.81077, 4.4237204, 4.81077], [12.061236, 12.298177, 12.061236], [17.362553, 19.201174, 17.362553]],
-            [[21.440754, 22.142393, 21.440754], [30.15308, 30.745445, 30.153086], [33.99255, 36.919838, 33.99255]],
+            {
+                "keys": "img",
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "prob": 1.0,
+            },
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.81077, 4.4237204, 4.81077],
+                        [12.061236, 12.298177, 12.061236],
+                        [17.362553, 19.201174, 17.362553],
+                    ],
+                    [
+                        [21.440754, 22.142393, 21.440754],
+                        [30.15308, 30.745445, 30.153086],
+                        [33.99255, 36.919838, 33.99255],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_4 = [
-    {
-        "keys": "img",
-        "sigma1_x": (0.5, 0.75),
-        "sigma1_y": (0.5, 0.75),
-        "sigma1_z": (0.5, 0.75),
-        "sigma2_x": (0.5, 0.75),
-        "sigma2_y": (0.5, 0.75),
-        "sigma2_z": (0.5, 0.75),
-        "approx": "scalespace",
-        "prob": 1.0,
-    },
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[4.430213, 3.2278745, 4.4302144], [10.325399, 8.507457, 10.325399], [17.494898, 16.5609, 17.494894]],
-            [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+            {
+                "keys": "img",
+                "sigma1_x": (0.5, 0.75),
+                "sigma1_y": (0.5, 0.75),
+                "sigma1_z": (0.5, 0.75),
+                "sigma2_x": (0.5, 0.75),
+                "sigma2_y": (0.5, 0.75),
+                "sigma2_z": (0.5, 0.75),
+                "approx": "scalespace",
+                "prob": 1.0,
+            },
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [4.430213, 3.2278745, 4.4302144],
+                        [10.325399, 8.507457, 10.325399],
+                        [17.494898, 16.5609, 17.494894],
+                    ],
+                    [[20.87405, 18.06946, 20.87405], [25.813503, 21.268656, 25.8135], [33.93874, 31.402481, 33.938725]],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSharpend(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_4])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSharpend(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_smooth.py b/tests/test_rand_gaussian_smooth.py
index 889ed7d6d5..b4d4304b67 100644
--- a/tests/test_rand_gaussian_smooth.py
+++ b/tests/test_rand_gaussian_smooth.py
@@ -15,48 +15,81 @@
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSmooth
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"sigma_x": (0.5, 1.5), "prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[0.71806467, 0.9074683, 0.71806467], [1.0718315, 1.3545481, 1.0718315], [1.0337002, 1.306359, 1.0337002]],
-            [[2.0318885, 2.5678391, 2.0318885], [2.6795788, 3.3863702, 2.6795788], [2.3475242, 2.9667296, 2.3475242]],
+            {"sigma_x": (0.5, 1.5), "prob": 1.0},
+            p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])),
+            np.array(
+                [
+                    [
+                        [0.71806467, 0.9074683, 0.71806467],
+                        [1.0718315, 1.3545481, 1.0718315],
+                        [1.0337002, 1.306359, 1.0337002],
+                    ],
+                    [
+                        [2.0318885, 2.5678391, 2.0318885],
+                        [2.6795788, 3.3863702, 2.6795788],
+                        [2.3475242, 2.9667296, 2.3475242],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.7686928, 0.9848021, 0.7686928], [1.1474025, 1.4699818, 1.1474024], [1.1065826, 1.4176859, 1.1065826]],
-            [[2.1751494, 2.7866683, 2.1751497], [2.8685062, 3.6749542, 2.8685062], [2.5130394, 3.219552, 2.5130394]],
+            {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
+            p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])),
+            np.array(
+                [
+                    [
+                        [0.7686928, 0.9848021, 0.7686928],
+                        [1.1474025, 1.4699818, 1.1474024],
+                        [1.1065826, 1.4176859, 1.1065826],
+                    ],
+                    [
+                        [2.1751494, 2.7866683, 2.1751497],
+                        [2.8685062, 3.6749542, 2.8685062],
+                        [2.5130394, 3.219552, 2.5130394],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
-    np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]),
-    np.array(
+    TESTS.append(
         [
-            [[0.8128456, 0.96736777, 0.8128456], [1.2742369, 1.5164697, 1.2742369], [1.2800367, 1.5233722, 1.2800368]],
-            [[2.3825073, 2.8354228, 2.3825073], [3.1855922, 3.7911744, 3.1855922], [2.8496985, 3.391427, 2.8496985]],
+            {"sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
+            p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])),
+            np.array(
+                [
+                    [
+                        [0.8128456, 0.96736777, 0.8128456],
+                        [1.2742369, 1.5164697, 1.2742369],
+                        [1.2800367, 1.5233722, 1.2800368],
+                    ],
+                    [
+                        [2.3825073, 2.8354228, 2.3825073],
+                        [3.1855922, 3.7911744, 3.1855922],
+                        [2.8496985, 3.391427, 2.8496985],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSmooth(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSmooth(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result, expected_data, rtol=1e-4)
+        assert_allclose(result, expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gaussian_smoothd.py b/tests/test_rand_gaussian_smoothd.py
index 2eedc9071c..2c80b978f2 100644
--- a/tests/test_rand_gaussian_smoothd.py
+++ b/tests/test_rand_gaussian_smoothd.py
@@ -15,48 +15,81 @@
 from parameterized import parameterized
 
 from monai.transforms import RandGaussianSmoothd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [
-    {"keys": "img", "sigma_x": (0.5, 1.5), "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
         [
-            [[0.71806467, 0.9074683, 0.71806467], [1.0718315, 1.3545481, 1.0718315], [1.0337002, 1.306359, 1.0337002]],
-            [[2.0318885, 2.5678391, 2.0318885], [2.6795788, 3.3863702, 2.6795788], [2.3475242, 2.9667296, 2.3475242]],
+            {"keys": "img", "sigma_x": (0.5, 1.5), "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.71806467, 0.9074683, 0.71806467],
+                        [1.0718315, 1.3545481, 1.0718315],
+                        [1.0337002, 1.306359, 1.0337002],
+                    ],
+                    [
+                        [2.0318885, 2.5678391, 2.0318885],
+                        [2.6795788, 3.3863702, 2.6795788],
+                        [2.3475242, 2.9667296, 2.3475242],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_2 = [
-    {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.7686928, 0.9848021, 0.7686928], [1.1474025, 1.4699818, 1.1474024], [1.1065826, 1.4176859, 1.1065826]],
-            [[2.1751494, 2.7866683, 2.1751497], [2.8685062, 3.6749542, 2.8685062], [2.5130394, 3.219552, 2.5130394]],
+            {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.7686928, 0.9848021, 0.7686928],
+                        [1.1474025, 1.4699818, 1.1474024],
+                        [1.1065826, 1.4176859, 1.1065826],
+                    ],
+                    [
+                        [2.1751494, 2.7866683, 2.1751497],
+                        [2.8685062, 3.6749542, 2.8685062],
+                        [2.5130394, 3.219552, 2.5130394],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
-TEST_CASE_3 = [
-    {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
-    {"img": np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]])},
-    np.array(
+    TESTS.append(
         [
-            [[0.8128456, 0.96736777, 0.8128456], [1.2742369, 1.5164697, 1.2742369], [1.2800367, 1.5233722, 1.2800368]],
-            [[2.3825073, 2.8354228, 2.3825073], [3.1855922, 3.7911744, 3.1855922], [2.8496985, 3.391427, 2.8496985]],
+            {"keys": "img", "sigma_x": (0.5, 1.5), "sigma_y": (0.5, 1.0), "approx": "scalespace", "prob": 1.0},
+            {"img": p(np.array([[[1, 1, 1], [2, 2, 2], [3, 3, 3]], [[4, 4, 4], [5, 5, 5], [6, 6, 6]]]))},
+            np.array(
+                [
+                    [
+                        [0.8128456, 0.96736777, 0.8128456],
+                        [1.2742369, 1.5164697, 1.2742369],
+                        [1.2800367, 1.5233722, 1.2800368],
+                    ],
+                    [
+                        [2.3825073, 2.8354228, 2.3825073],
+                        [3.1855922, 3.7911744, 3.1855922],
+                        [2.8496985, 3.391427, 2.8496985],
+                    ],
+                ]
+            ),
         ]
-    ),
-]
+    )
 
 
 class TestRandGaussianSmoothd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(TESTS)
     def test_value(self, argments, image, expected_data):
         converter = RandGaussianSmoothd(**argments)
         converter.set_random_state(seed=0)
         result = converter(image)
-        np.testing.assert_allclose(result["img"], expected_data, rtol=1e-4)
+        assert_allclose(result["img"], expected_data, rtol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_gibbs_noise.py b/tests/test_rand_gibbs_noise.py
index a0701d09c3..15cadea0e2 100644
--- a/tests/test_rand_gibbs_noise.py
+++ b/tests/test_rand_gibbs_noise.py
@@ -19,17 +19,17 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import RandGibbsNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestRandGibbsNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -39,50 +39,50 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         im = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        return input_type(im)
 
     @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_0_prob(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.5, 1.0]
-        t = RandGibbsNoise(0.0, alpha, as_tensor_output)
+        t = RandGibbsNoise(0.0, alpha)
         out = t(im)
-        np.testing.assert_allclose(im, out)
+        torch.testing.assert_allclose(im, out, rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.8]
-        t = RandGibbsNoise(1.0, alpha, as_tensor_output)
+        t = RandGibbsNoise(1.0, alpha)
         t.set_random_state(42)
         out1 = t(deepcopy(im))
         t.set_random_state(42)
         out2 = t(deepcopy(im))
-        np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
+        torch.testing.assert_allclose(out1, out2, rtol=1e-7, atol=0)
+        self.assertIsInstance(out1, type(im))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.0, 0.0]
         t = RandGibbsNoise(1.0, alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(im, out, atol=1e-2)
+        torch.testing.assert_allclose(im, out, atol=1e-2, rtol=1e-7)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [1.0, 1.0]
         t = RandGibbsNoise(1.0, alpha)
         out = t(deepcopy(im))
-        np.testing.assert_allclose(0 * im, out)
+        torch.testing.assert_allclose(0 * im, out, rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha(self, im_shape, _, as_tensor_input):
-        im = self.get_data(im_shape, as_tensor_input)
+    def test_alpha(self, im_shape, input_type):
+        im = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.51]
         t = RandGibbsNoise(1.0, alpha)
         _ = t(deepcopy(im))
diff --git a/tests/test_rand_gibbs_noised.py b/tests/test_rand_gibbs_noised.py
index b778bffdda..b8bac67b81 100644
--- a/tests/test_rand_gibbs_noised.py
+++ b/tests/test_rand_gibbs_noised.py
@@ -19,19 +19,19 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import RandGibbsNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from monai.utils.module import optional_import
+from tests.utils import TEST_NDARRAYS
+
+_, has_torch_fft = optional_import("torch.fft", name="fftshift")
 
 TEST_CASES = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for input_type in TEST_NDARRAYS if has_torch_fft else [np.array]:
+        TEST_CASES.append((shape, input_type))
 
 KEYS = ["im", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestRandGibbsNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -41,70 +41,76 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, input_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
+        return {k: input_type(v) for k, v in zip(KEYS, ims)}
 
     @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_0_prob(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.5, 1.0]
-        t = RandGibbsNoised(KEYS, 0.0, alpha, as_tensor_output)
+        t = RandGibbsNoised(KEYS, 0.0, alpha)
         out = t(data)
         for k in KEYS:
-            np.testing.assert_allclose(data[k], out[k])
+            torch.testing.assert_allclose(data[k], out[k], rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_same_result(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.8]
-        t = RandGibbsNoised(KEYS, 1.0, alpha, as_tensor_output)
+        t = RandGibbsNoised(KEYS, 1.0, alpha)
         t.set_random_state(42)
         out1 = t(deepcopy(data))
         t.set_random_state(42)
         out2 = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(out1[k], out2[k])
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
+            torch.testing.assert_allclose(out1[k], out2[k], rtol=1e-7, atol=0)
+            self.assertIsInstance(out1[k], type(data[k]))
 
     @parameterized.expand(TEST_CASES)
-    def test_identity(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_identity(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.0, 0.0]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
             np.testing.assert_allclose(data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha_1(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_alpha_1(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [1.0, 1.0]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         out = t(deepcopy(data))
         for k in KEYS:
-            np.testing.assert_allclose(0 * data[k], out[k])
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k], data[k] = out[k].cpu(), data[k].cpu()
+            np.testing.assert_allclose(0.0 * data[k], out[k], atol=1e-2)
 
     @parameterized.expand(TEST_CASES)
-    def test_dict_matches(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_dict_matches(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         # use same image for both dictionary entries to check same trans is applied to them
         data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
         alpha = [0.5, 1.0]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         out = t(deepcopy(data))
-        np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
+        torch.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]], rtol=1e-7, atol=0)
 
     @parameterized.expand(TEST_CASES)
-    def test_alpha(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    def test_alpha(self, im_shape, input_type):
+        data = self.get_data(im_shape, input_type)
         alpha = [0.5, 0.51]
         t = RandGibbsNoised(KEYS, 1.0, alpha)
         _ = t(deepcopy(data))
-        self.assertGreaterEqual(t.sampled_alpha, 0.5)
-        self.assertLessEqual(t.sampled_alpha, 0.51)
+        self.assertTrue(0.5 <= t.sampled_alpha <= 0.51)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_histogram_shift.py b/tests/test_rand_histogram_shift.py
index b258cc5a7e..fa51dacefa 100644
--- a/tests/test_rand_histogram_shift.py
+++ b/tests/test_rand_histogram_shift.py
@@ -15,28 +15,35 @@
 from parameterized import parameterized
 
 from monai.transforms import RandHistogramShift
-
-TEST_CASES = [
-    [
-        {"num_control_points": 5, "prob": 0.0},
-        {"img": np.arange(8).reshape((1, 2, 2, 2))},
-        np.arange(8).reshape((1, 2, 2, 2)),
-    ],
-    [
-        {"num_control_points": 5, "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)},
-        np.array([[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]),
-    ],
-    [
-        {"num_control_points": (5, 20), "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)},
-        np.array([[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]),
-    ],
-]
+from tests.utils import TEST_NDARRAYS
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"num_control_points": 5, "prob": 0.0},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2)))},
+            np.arange(8).reshape((1, 2, 2, 2)),
+        ]
+    )
+    TESTS.append(
+        [
+            {"num_control_points": 5, "prob": 0.9},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32))},
+            np.array([[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            {"num_control_points": (5, 20), "prob": 0.9},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32))},
+            np.array([[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]),
+        ]
+    )
 
 
 class TestRandHistogramShift(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_histogram_shift(self, input_param, input_data, expected_val):
         g = RandHistogramShift(**input_param)
         g.set_random_state(123)
diff --git a/tests/test_rand_histogram_shiftd.py b/tests/test_rand_histogram_shiftd.py
index 806e4f5cf2..2191e99518 100644
--- a/tests/test_rand_histogram_shiftd.py
+++ b/tests/test_rand_histogram_shiftd.py
@@ -14,47 +14,60 @@
 import numpy as np
 from parameterized import parameterized
 
-from monai.transforms import RandHistogramShiftD
-
-TEST_CASES = [
-    [
-        {"keys": ("img",), "num_control_points": 5, "prob": 0.0},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-    ],
-    [
-        {"keys": ("img",), "num_control_points": 5, "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-        {
-            "img": np.array(
-                [[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]
-            ),
-            "seg": np.ones(8).reshape((1, 2, 2, 2)),
-        },
-    ],
-    [
-        {"keys": ("img",), "num_control_points": (5, 20), "prob": 0.9},
-        {"img": np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32), "seg": np.ones(8).reshape((1, 2, 2, 2))},
-        {
-            "img": np.array(
-                [[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]
-            ),
-            "seg": np.ones(8).reshape((1, 2, 2, 2)),
-        },
-    ],
-]
+from monai.transforms.intensity.dictionary import RandHistogramShiftd
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"keys": ("img",), "num_control_points": 5, "prob": 0.0},
+            {"img": p(np.arange(8).reshape((1, 2, 2, 2))), "seg": p(np.ones(8).reshape((1, 2, 2, 2)))},
+            {"img": np.arange(8).reshape((1, 2, 2, 2)), "seg": np.ones(8).reshape((1, 2, 2, 2))},
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ("img",), "num_control_points": 5, "prob": 0.9},
+            {
+                "img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)),
+                "seg": p(np.ones(8).reshape((1, 2, 2, 2))),
+            },
+            {
+                "img": np.array(
+                    [[[[0.0, 0.57227867], [1.1391707, 1.68990281]], [[2.75833219, 4.34445884], [5.70913743, 7.0]]]]
+                ),
+                "seg": np.ones(8).reshape((1, 2, 2, 2)),
+            },
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ("img",), "num_control_points": (5, 20), "prob": 0.9},
+            {
+                "img": p(np.arange(8).reshape((1, 2, 2, 2)).astype(np.float32)),
+                "seg": p(np.ones(8).reshape((1, 2, 2, 2))),
+            },
+            {
+                "img": np.array(
+                    [[[[0.0, 1.17472492], [2.21553091, 2.88292011]], [[3.98407301, 5.01302123], [6.09275004, 7.0]]]]
+                ),
+                "seg": np.ones(8).reshape((1, 2, 2, 2)),
+            },
+        ]
+    )
 
 
 class TestRandHistogramShiftD(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_rand_histogram_shiftd(self, input_param, input_data, expected_val):
-        g = RandHistogramShiftD(**input_param)
+        g = RandHistogramShiftd(**input_param)
         g.set_random_state(123)
         res = g(input_data)
         for key in res:
             result = res[key]
             expected = expected_val[key] if isinstance(expected_val, dict) else expected_val
-            np.testing.assert_allclose(result, expected, rtol=1e-4, atol=1e-4)
+            assert_allclose(result, expected, rtol=1e-4, atol=1e-4, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_k_space_spike_noise.py b/tests/test_rand_k_space_spike_noise.py
index 71f7e36d9b..1c9ca9c1d5 100644
--- a/tests/test_rand_k_space_spike_noise.py
+++ b/tests/test_rand_k_space_spike_noise.py
@@ -19,18 +19,15 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import KSpaceSpikeNoise, RandKSpaceSpikeNoise
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            for channel_wise in (True, False):
-                TEST_CASES.append((shape, as_tensor_output, as_tensor_input, channel_wise))
+    for p in TEST_NDARRAYS:
+        for channel_wise in (True, False):
+            TESTS.append((shape, p, channel_wise))
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestRandKSpaceSpikeNoise(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -40,44 +37,55 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         im = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)[0][None]
-        return torch.Tensor(im) if as_tensor_input else im
+        return im_type(im)
 
-    @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_0_prob(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 15]
-        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise, as_tensor_output)
+        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise)
         out = t(im)
+        self.assertEqual(type(im), type(out))
+        if isinstance(out, torch.Tensor):
+            self.assertEqual(out.device, im.device)
+            im, out = im.cpu(), out.cpu()
         np.testing.assert_allclose(im, out)
 
-    @parameterized.expand(TEST_CASES)
-    def test_1_prob(self, im_shape, as_tensor_output, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_1_prob(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 14]
-        t = RandKSpaceSpikeNoise(1.0, intensity_range, channel_wise, as_tensor_output)
+        t = RandKSpaceSpikeNoise(1.0, intensity_range, channel_wise)
         out = t(im)
-        base_t = KSpaceSpikeNoise(t.sampled_locs, [14], as_tensor_output)
+        base_t = KSpaceSpikeNoise(t.sampled_locs, [14])
         out = out - base_t(im)
+        self.assertEqual(type(im), type(out))
+        if isinstance(out, torch.Tensor):
+            self.assertEqual(out.device, im.device)
+            im, out = im.cpu(), out.cpu()
         np.testing.assert_allclose(out, im * 0)
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 15]
-        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise, as_tensor_output)
+        t = RandKSpaceSpikeNoise(0.0, intensity_range, channel_wise)
         t.set_random_state(42)
         out1 = t(deepcopy(im))
         t.set_random_state(42)
         out2 = t(deepcopy(im))
+        self.assertEqual(type(im), type(out1))
+        if isinstance(out1, torch.Tensor):
+            self.assertEqual(out1.device, im.device)
+            out1, out2 = out1.cpu(), out2.cpu()
         np.testing.assert_allclose(out1, out2)
-        self.assertIsInstance(out1, torch.Tensor if as_tensor_output else np.ndarray)
 
-    @parameterized.expand(TEST_CASES)
-    def test_intensity(self, im_shape, _, as_tensor_input, channel_wise):
-        im = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_intensity(self, im_shape, im_type, channel_wise):
+        im = self.get_data(im_shape, im_type)
         intensity_range = [14, 14.1]
         t = RandKSpaceSpikeNoise(1.0, intensity_range, channel_wise)
         _ = t(deepcopy(im))
diff --git a/tests/test_rand_k_space_spike_noised.py b/tests/test_rand_k_space_spike_noised.py
index 1056ebf163..869aa50872 100644
--- a/tests/test_rand_k_space_spike_noised.py
+++ b/tests/test_rand_k_space_spike_noised.py
@@ -19,19 +19,16 @@
 from monai.data.synthetic import create_test_image_2d, create_test_image_3d
 from monai.transforms import RandKSpaceSpikeNoised
 from monai.utils.misc import set_determinism
-from tests.utils import SkipIfBeforePyTorchVersion, SkipIfNoModule
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = []
+TESTS = []
 for shape in ((128, 64), (64, 48, 80)):
-    for as_tensor_output in (True, False):
-        for as_tensor_input in (True, False):
-            TEST_CASES.append((shape, as_tensor_output, as_tensor_input))
+    for p in TEST_NDARRAYS:
+        TESTS.append((shape, p))
 
 KEYS = ["image", "label"]
 
 
-@SkipIfBeforePyTorchVersion((1, 8))
-@SkipIfNoModule("torch.fft")
 class TestKSpaceSpikeNoised(unittest.TestCase):
     def setUp(self):
         set_determinism(0)
@@ -41,17 +38,16 @@ def tearDown(self):
         set_determinism(None)
 
     @staticmethod
-    def get_data(im_shape, as_tensor_input):
+    def get_data(im_shape, im_type):
         create_test_image = create_test_image_2d if len(im_shape) == 2 else create_test_image_3d
         ims = create_test_image(*im_shape, rad_max=20, noise_max=0.0, num_seg_classes=5)
-        ims = [im[None] for im in ims]
-        ims = [torch.Tensor(im) for im in ims] if as_tensor_input else ims
-        return dict(zip(KEYS, ims))
+        ims = [im_type(im[None]) for im in ims]
+        return {k: v for k, v in zip(KEYS, ims)}
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_same_result(self, im_shape, im_type):
 
-        data = self.get_data(im_shape, as_tensor_input)
+        data = self.get_data(im_shape, im_type)
 
         intensity_ranges = {"image": (13, 15), "label": (13, 15)}
         t = RandKSpaceSpikeNoised(
@@ -60,7 +56,6 @@ def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
             prob=1.0,
             intensity_ranges=intensity_ranges,
             channel_wise=True,
-            as_tensor_output=as_tensor_output,
         )
         t.set_rand_state(42)
         out1 = t(deepcopy(data))
@@ -69,12 +64,16 @@ def test_same_result(self, im_shape, as_tensor_output, as_tensor_input):
         out2 = t(deepcopy(data))
 
         for k in KEYS:
+            self.assertEqual(type(out1[k]), type(data[k]))
+            if isinstance(out1[k], torch.Tensor):
+                self.assertEqual(out1[k].device, data[k].device)
+                out1[k] = out1[k].cpu()
+                out2[k] = out2[k].cpu()
             np.testing.assert_allclose(out1[k], out2[k], atol=1e-10)
-            self.assertIsInstance(out1[k], torch.Tensor if as_tensor_output else np.ndarray)
 
-    @parameterized.expand(TEST_CASES)
-    def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_0_prob(self, im_shape, im_type):
+        data = self.get_data(im_shape, im_type)
         intensity_ranges = {"image": (13, 15), "label": (13, 15)}
         t1 = RandKSpaceSpikeNoised(
             KEYS,
@@ -82,7 +81,6 @@ def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
             prob=1.0,
             intensity_ranges=intensity_ranges,
             channel_wise=True,
-            as_tensor_output=as_tensor_output,
         )
 
         t2 = RandKSpaceSpikeNoised(
@@ -91,19 +89,25 @@ def test_0_prob(self, im_shape, as_tensor_output, as_tensor_input):
             prob=1.0,
             intensity_ranges=intensity_ranges,
             channel_wise=True,
-            as_tensor_output=as_tensor_output,
         )
         out1 = t1(data)
         out2 = t2(data)
 
         for k in KEYS:
+            self.assertEqual(type(out1[k]), type(data[k]))
+            if isinstance(out1[k], torch.Tensor):
+                self.assertEqual(out1[k].device, data[k].device)
+                out1[k] = out1[k].cpu()
+                out2[k] = out2[k].cpu()
+                data[k] = data[k].cpu()
+
             np.testing.assert_allclose(data[k], out1[k])
             np.testing.assert_allclose(data[k], out2[k])
 
-    @parameterized.expand(TEST_CASES)
-    def test_intensity(self, im_shape, as_tensor_output, as_tensor_input):
+    @parameterized.expand(TESTS)
+    def test_intensity(self, im_shape, im_type):
 
-        data = self.get_data(im_shape, as_tensor_input)
+        data = self.get_data(im_shape, im_type)
         intensity_ranges = {"image": (13, 13.1), "label": (13, 13.1)}
         t = RandKSpaceSpikeNoised(
             KEYS,
@@ -111,7 +115,6 @@ def test_intensity(self, im_shape, as_tensor_output, as_tensor_input):
             prob=1.0,
             intensity_ranges=intensity_ranges,
             channel_wise=True,
-            as_tensor_output=True,
         )
 
         _ = t(data)
@@ -120,9 +123,9 @@ def test_intensity(self, im_shape, as_tensor_output, as_tensor_input):
         self.assertGreaterEqual(t.transforms["label"].sampled_k_intensity[0], 13)
         self.assertLessEqual(t.transforms["label"].sampled_k_intensity[0], 13.1)
 
-    @parameterized.expand(TEST_CASES)
-    def test_same_transformation(self, im_shape, _, as_tensor_input):
-        data = self.get_data(im_shape, as_tensor_input)
+    @parameterized.expand(TESTS)
+    def test_same_transformation(self, im_shape, im_type):
+        data = self.get_data(im_shape, im_type)
         # use same image for both dictionary entries to check same trans is applied to them
         data = {KEYS[0]: deepcopy(data[KEYS[0]]), KEYS[1]: deepcopy(data[KEYS[0]])}
 
@@ -135,11 +138,16 @@ def test_same_transformation(self, im_shape, _, as_tensor_input):
             intensity_ranges=intensity_ranges,
             channel_wise=True,
             common_sampling=True,
-            as_tensor_output=True,
         )
 
         out = t(deepcopy(data))
 
+        for k in KEYS:
+            self.assertEqual(type(out[k]), type(data[k]))
+            if isinstance(out[k], torch.Tensor):
+                self.assertEqual(out[k].device, data[k].device)
+                out[k] = out[k].cpu()
+
         np.testing.assert_allclose(out[KEYS[0]], out[KEYS[1]])
 
 
diff --git a/tests/test_rand_rotate.py b/tests/test_rand_rotate.py
index 0ff8508a0f..4817e81735 100644
--- a/tests/test_rand_rotate.py
+++ b/tests/test_rand_rotate.py
@@ -10,25 +10,60 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import RandRotate
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
 
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, np.pi / 2, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4, True, "nearest", "border", False))
+    TEST_CASES_2D.append((p, np.pi, False, "nearest", "zeros", True))
+    TEST_CASES_2D.append((p, (-np.pi / 4, 0), False, "nearest", "zeros", True))
 
-class TestRandRotate2D(NumpyImageTestCase2D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, True, "bilinear", "border", False),
-            (np.pi / 4, True, "nearest", "border", False),
-            (np.pi, False, "nearest", "zeros", True),
-            ((-np.pi / 4, 0), False, "nearest", "zeros", True),
-        ]
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append(
+        (p, np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109))
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 4,
+            (-np.pi / 9, np.pi / 4.5),
+            (np.pi / 9, np.pi / 6),
+            False,
+            "nearest",
+            "border",
+            True,
+            (1, 89, 105, 104),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            0.0,
+            (2 * np.pi, 2.06 * np.pi),
+            (-np.pi / 180, np.pi / 180),
+            True,
+            "nearest",
+            "zeros",
+            True,
+            (1, 48, 64, 80),
+        )
     )
-    def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_corners):
+    TEST_CASES_3D.append((p, (-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)))
+
+
+class TestRandRotate2D(NumpyImageTestCase2D):
+    @parameterized.expand(TEST_CASES_2D)
+    def test_correct_results(self, im_type, degrees, keep_size, mode, padding_mode, align_corners):
         rotate_fn = RandRotate(
             range_x=degrees,
             prob=1.0,
@@ -38,7 +73,7 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn(self.imt[0])
+        rotated = rotate_fn(im_type(self.imt[0]))
 
         _order = 0 if mode == "nearest" else 1
         if mode == "border":
@@ -52,38 +87,14 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
         expected = np.stack(expected).astype(np.float32)
+        rotated = rotated.cpu() if isinstance(rotated, torch.Tensor) else rotated
         good = np.sum(np.isclose(expected, rotated[0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
 
 class TestRandRotate3D(NumpyImageTestCase3D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109)),
-            (
-                np.pi / 4,
-                (-np.pi / 9, np.pi / 4.5),
-                (np.pi / 9, np.pi / 6),
-                False,
-                "nearest",
-                "border",
-                True,
-                (1, 89, 105, 104),
-            ),
-            (
-                0.0,
-                (2 * np.pi, 2.06 * np.pi),
-                (-np.pi / 180, np.pi / 180),
-                True,
-                "nearest",
-                "zeros",
-                True,
-                (1, 48, 64, 80),
-            ),
-            ((-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)),
-        ]
-    )
-    def test_correct_results(self, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
+    @parameterized.expand(TEST_CASES_3D)
+    def test_correct_results(self, im_type, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
         rotate_fn = RandRotate(
             range_x=x,
             range_y=y,
@@ -95,8 +106,8 @@ def test_correct_results(self, x, y, z, keep_size, mode, padding_mode, align_cor
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn(self.imt[0])
-        np.testing.assert_allclose(rotated.shape, expected)
+        rotated = rotate_fn(im_type(self.imt[0]))
+        torch.testing.assert_allclose(rotated.shape, expected, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_rotate90.py b/tests/test_rand_rotate90.py
index 50a1b28e53..9fc025fbbe 100644
--- a/tests/test_rand_rotate90.py
+++ b/tests/test_rand_rotate90.py
@@ -14,49 +14,45 @@
 import numpy as np
 
 from monai.transforms import RandRotate90
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRandRotate90(NumpyImageTestCase2D):
     def test_default(self):
         rotate = RandRotate90()
-        rotate.set_random_state(123)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(123)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_k(self):
         rotate = RandRotate90(max_k=2)
-        rotate.set_random_state(234)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_spatial_axes(self):
         rotate = RandRotate90(spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_prob_k_spatial_axes(self):
         rotate = RandRotate90(prob=1.0, max_k=2, spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_rotate90d.py b/tests/test_rand_rotate90d.py
index a487b695f5..3071aa82c8 100644
--- a/tests/test_rand_rotate90d.py
+++ b/tests/test_rand_rotate90d.py
@@ -14,53 +14,49 @@
 import numpy as np
 
 from monai.transforms import RandRotate90d
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRandRotate90d(NumpyImageTestCase2D):
     def test_default(self):
         key = None
         rotate = RandRotate90d(keys=key)
-        rotate.set_random_state(123)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(123)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_k(self):
         key = "test"
         rotate = RandRotate90d(keys=key, max_k=2)
-        rotate.set_random_state(234)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_spatial_axes(self):
         key = "test"
         rotate = RandRotate90d(keys=key, spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 0, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 0, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_prob_k_spatial_axes(self):
         key = "test"
         rotate = RandRotate90d(keys=key, prob=1.0, max_k=2, spatial_axes=(0, 1))
-        rotate.set_random_state(234)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotate.set_random_state(234)
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_no_key(self):
         key = "unknown"
diff --git a/tests/test_rand_rotated.py b/tests/test_rand_rotated.py
index 47b4b7107e..4c9a27f668 100644
--- a/tests/test_rand_rotated.py
+++ b/tests/test_rand_rotated.py
@@ -10,26 +10,104 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import RandRotated
 from monai.utils import GridSampleMode, GridSamplePadMode
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
 
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, np.pi / 2, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4, True, "nearest", "border", False))
+    TEST_CASES_2D.append((p, np.pi, False, "nearest", "zeros", True))
+    TEST_CASES_2D.append((p, (-np.pi / 4, 0), False, "nearest", "zeros", True))
 
-class TestRandRotated2D(NumpyImageTestCase2D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, True, "bilinear", "border", False),
-            (np.pi / 4, True, "nearest", "border", False),
-            (np.pi, False, "nearest", "zeros", True),
-            ((-np.pi / 4, 0), False, "nearest", "zeros", True),
-        ]
+
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append(
+        (p, np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109))
     )
-    def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_corners):
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 2,
+            -np.pi / 6,
+            (0.0, np.pi),
+            False,
+            GridSampleMode.NEAREST,
+            GridSamplePadMode.BORDER,
+            False,
+            (1, 87, 104, 109),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 4,
+            (-np.pi / 9, np.pi / 4.5),
+            (np.pi / 9, np.pi / 6),
+            False,
+            "nearest",
+            "border",
+            True,
+            (1, 89, 105, 104),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            np.pi / 4,
+            (-np.pi / 9, np.pi / 4.5),
+            (np.pi / 9, np.pi / 6),
+            False,
+            GridSampleMode.NEAREST,
+            GridSamplePadMode.BORDER,
+            True,
+            (1, 89, 105, 104),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            0.0,
+            (2 * np.pi, 2.06 * np.pi),
+            (-np.pi / 180, np.pi / 180),
+            True,
+            "nearest",
+            "zeros",
+            True,
+            (1, 48, 64, 80),
+        )
+    )
+    TEST_CASES_3D.append(
+        (
+            p,
+            0.0,
+            (2 * np.pi, 2.06 * np.pi),
+            (-np.pi / 180, np.pi / 180),
+            True,
+            GridSampleMode.NEAREST,
+            GridSamplePadMode.ZEROS,
+            True,
+            (1, 48, 64, 80),
+        )
+    )
+    TEST_CASES_3D.append((p, (-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)))
+    TEST_CASES_3D.append(
+        (p, (-np.pi / 4, 0), 0, 0, False, GridSampleMode.NEAREST, GridSamplePadMode.ZEROS, False, (1, 48, 77, 90))
+    )
+
+
+class TestRandRotated2D(NumpyImageTestCase2D):
+    @parameterized.expand(TEST_CASES_2D)
+    def test_correct_results(self, im_type, degrees, keep_size, mode, padding_mode, align_corners):
         rotate_fn = RandRotated(
             "img",
             range_x=degrees,
@@ -40,7 +118,7 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
 
         _order = 0 if mode == "nearest" else 1
         if padding_mode == "border":
@@ -53,70 +131,16 @@ def test_correct_results(self, degrees, keep_size, mode, padding_mode, align_cor
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         expected = np.stack(expected).astype(np.float32)
         good = np.sum(np.isclose(expected, rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
 
 class TestRandRotated3D(NumpyImageTestCase3D):
-    @parameterized.expand(
-        [
-            (np.pi / 2, -np.pi / 6, (0.0, np.pi), False, "bilinear", "border", False, (1, 87, 104, 109)),
-            (
-                np.pi / 2,
-                -np.pi / 6,
-                (0.0, np.pi),
-                False,
-                GridSampleMode.NEAREST,
-                GridSamplePadMode.BORDER,
-                False,
-                (1, 87, 104, 109),
-            ),
-            (
-                np.pi / 4,
-                (-np.pi / 9, np.pi / 4.5),
-                (np.pi / 9, np.pi / 6),
-                False,
-                "nearest",
-                "border",
-                True,
-                (1, 89, 105, 104),
-            ),
-            (
-                np.pi / 4,
-                (-np.pi / 9, np.pi / 4.5),
-                (np.pi / 9, np.pi / 6),
-                False,
-                GridSampleMode.NEAREST,
-                GridSamplePadMode.BORDER,
-                True,
-                (1, 89, 105, 104),
-            ),
-            (
-                0.0,
-                (2 * np.pi, 2.06 * np.pi),
-                (-np.pi / 180, np.pi / 180),
-                True,
-                "nearest",
-                "zeros",
-                True,
-                (1, 48, 64, 80),
-            ),
-            (
-                0.0,
-                (2 * np.pi, 2.06 * np.pi),
-                (-np.pi / 180, np.pi / 180),
-                True,
-                GridSampleMode.NEAREST,
-                GridSamplePadMode.ZEROS,
-                True,
-                (1, 48, 64, 80),
-            ),
-            ((-np.pi / 4, 0), 0, 0, False, "nearest", "zeros", False, (1, 48, 77, 90)),
-            ((-np.pi / 4, 0), 0, 0, False, GridSampleMode.NEAREST, GridSamplePadMode.ZEROS, False, (1, 48, 77, 90)),
-        ]
-    )
-    def test_correct_shapes(self, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
+    @parameterized.expand(TEST_CASES_3D)
+    def test_correct_shapes(self, im_type, x, y, z, keep_size, mode, padding_mode, align_corners, expected):
         rotate_fn = RandRotated(
             "img",
             range_x=x,
@@ -129,7 +153,7 @@ def test_correct_shapes(self, x, y, z, keep_size, mode, padding_mode, align_corn
             align_corners=align_corners,
         )
         rotate_fn.set_random_state(243)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         np.testing.assert_allclose(rotated["img"].shape, expected)
 
 
diff --git a/tests/test_rand_scale_crop.py b/tests/test_rand_scale_crop.py
index db5487ebff..a0c5471ffb 100644
--- a/tests/test_rand_scale_crop.py
+++ b/tests/test_rand_scale_crop.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandScaleCrop
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"roi_scale": [1.0, 1.0, -1.0], "random_center": True},
@@ -55,22 +56,25 @@
 class TestRandScaleCrop(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_shape(self, input_param, input_data, expected_shape):
-        result = RandScaleCrop(**input_param)(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            result = RandScaleCrop(**input_param)(p(input_data))
+            self.assertTupleEqual(result.shape, expected_shape)
 
     @parameterized.expand([TEST_CASE_3])
     def test_value(self, input_param, input_data):
-        cropper = RandScaleCrop(**input_param)
-        result = cropper(input_data)
-        roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
-        np.testing.assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]])
+        for p in TEST_NDARRAYS:
+            cropper = RandScaleCrop(**input_param)
+            result = cropper(p(input_data))
+            roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
+            assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]], type_test=False)
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     def test_random_shape(self, input_param, input_data, expected_shape):
-        cropper = RandScaleCrop(**input_param)
-        cropper.set_random_state(seed=123)
-        result = cropper(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            cropper = RandScaleCrop(**input_param)
+            cropper.set_random_state(seed=123)
+            result = cropper(p(input_data))
+            self.assertTupleEqual(result.shape, expected_shape)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_scale_cropd.py b/tests/test_rand_scale_cropd.py
index 265c6c467d..f78a81d339 100644
--- a/tests/test_rand_scale_cropd.py
+++ b/tests/test_rand_scale_cropd.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandScaleCropd
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"keys": "img", "roi_scale": [1.0, 1.0, -1.0], "random_center": True},
@@ -66,10 +67,14 @@ def test_shape(self, input_param, input_data, expected_shape):
 
     @parameterized.expand([TEST_CASE_3])
     def test_value(self, input_param, input_data):
-        cropper = RandScaleCropd(**input_param)
-        result = cropper(input_data)
-        roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
-        np.testing.assert_allclose(result["img"], input_data["img"][:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]])
+        for p in TEST_NDARRAYS:
+            cropper = RandScaleCropd(**input_param)
+            input_data["img"] = p(input_data["img"])
+            result = cropper(input_data)
+            roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
+            assert_allclose(
+                result["img"], input_data["img"][:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]], type_test=False
+            )
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5, TEST_CASE_6])
     def test_random_shape(self, input_param, input_data, expected_shape):
diff --git a/tests/test_rand_scale_intensity.py b/tests/test_rand_scale_intensity.py
index 750d88bfad..b863e2f874 100644
--- a/tests/test_rand_scale_intensity.py
+++ b/tests/test_rand_scale_intensity.py
@@ -25,7 +25,7 @@ def test_value(self):
             result = scaler(p(self.imt))
             np.random.seed(0)
             expected = p((self.imt * (1 + np.random.uniform(low=-0.5, high=0.5))).astype(np.float32))
-            assert_allclose(result, expected, rtol=1e-7, atol=0)
+            assert_allclose(result, p(expected), rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_scale_intensityd.py b/tests/test_rand_scale_intensityd.py
index a8d2e63f65..fdcbd7146a 100644
--- a/tests/test_rand_scale_intensityd.py
+++ b/tests/test_rand_scale_intensityd.py
@@ -19,14 +19,14 @@
 
 class TestRandScaleIntensityd(NumpyImageTestCase2D):
     def test_value(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             scaler = RandScaleIntensityd(keys=[key], factors=0.5, prob=1.0)
             scaler.set_random_state(seed=0)
             result = scaler({key: p(self.imt)})
             np.random.seed(0)
             expected = (self.imt * (1 + np.random.uniform(low=-0.5, high=0.5))).astype(np.float32)
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_shift_intensityd.py b/tests/test_rand_shift_intensityd.py
index 6766236146..c5dfb66722 100644
--- a/tests/test_rand_shift_intensityd.py
+++ b/tests/test_rand_shift_intensityd.py
@@ -19,14 +19,14 @@
 
 class TestRandShiftIntensityd(NumpyImageTestCase2D):
     def test_value(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             shifter = RandShiftIntensityd(keys=[key], offsets=1.0, prob=1.0)
             shifter.set_random_state(seed=0)
             result = shifter({key: p(self.imt)})
             np.random.seed(0)
             expected = self.imt + np.random.uniform(low=-1.0, high=1.0)
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
     def test_factor(self):
         key = "img"
diff --git a/tests/test_rand_spatial_crop.py b/tests/test_rand_spatial_crop.py
index 01e057e589..19b1841c6d 100644
--- a/tests/test_rand_spatial_crop.py
+++ b/tests/test_rand_spatial_crop.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandSpatialCrop
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_0 = [
     {"roi_size": [3, 3, -1], "random_center": True},
@@ -56,10 +57,11 @@ def test_shape(self, input_param, input_data, expected_shape):
 
     @parameterized.expand([TEST_CASE_3])
     def test_value(self, input_param, input_data):
-        cropper = RandSpatialCrop(**input_param)
-        result = cropper(input_data)
-        roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
-        np.testing.assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]])
+        for p in TEST_NDARRAYS:
+            cropper = RandSpatialCrop(**input_param)
+            result = cropper(p(input_data))
+            roi = [(2 - i // 2, 2 + i - i // 2) for i in cropper._size]
+            assert_allclose(result, input_data[:, roi[0][0] : roi[0][1], roi[1][0] : roi[1][1]], type_test=False)
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5])
     def test_random_shape(self, input_param, input_data, expected_shape):
diff --git a/tests/test_rand_spatial_crop_samples.py b/tests/test_rand_spatial_crop_samples.py
index 0ade9bbbba..eefe7d0e0a 100644
--- a/tests/test_rand_spatial_crop_samples.py
+++ b/tests/test_rand_spatial_crop_samples.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandSpatialCropSamples
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"roi_size": [3, 3, 3], "num_samples": 4, "random_center": True, "random_size": False},
@@ -70,14 +71,15 @@
 class TestRandSpatialCropSamples(unittest.TestCase):
     @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
     def test_shape(self, input_param, input_data, expected_shape, expected_last_item):
-        xform = RandSpatialCropSamples(**input_param)
-        xform.set_random_state(1234)
-        result = xform(input_data)
+        for p in TEST_NDARRAYS:
+            xform = RandSpatialCropSamples(**input_param)
+            xform.set_random_state(1234)
+            result = xform(p(input_data))
 
-        np.testing.assert_equal(len(result), input_param["num_samples"])
-        for item, expected in zip(result, expected_shape):
-            self.assertTupleEqual(item.shape, expected)
-        np.testing.assert_allclose(result[-1], expected_last_item)
+            np.testing.assert_equal(len(result), input_param["num_samples"])
+            for item, expected in zip(result, expected_shape):
+                self.assertTupleEqual(item.shape, expected)
+            assert_allclose(result[-1], expected_last_item, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_spatial_crop_samplesd.py b/tests/test_rand_spatial_crop_samplesd.py
index 3f5eee7b27..4b41ce3344 100644
--- a/tests/test_rand_spatial_crop_samplesd.py
+++ b/tests/test_rand_spatial_crop_samplesd.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import Compose, RandSpatialCropSamplesd, ToTensord
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
 TEST_CASE_1 = [
     {"keys": ["img", "seg"], "num_samples": 4, "roi_size": [2, 2, 2], "random_center": True},
@@ -38,31 +39,48 @@
     },
 ]
 
-TEST_CASE_2 = [
-    {"keys": ["img", "seg"], "num_samples": 8, "roi_size": [2, 2, 3], "random_center": False},
-    {"img": np.arange(81).reshape(3, 3, 3, 3), "seg": np.arange(81, 0, -1).reshape(3, 3, 3, 3)},
-    [(3, 3, 3, 3), (3, 2, 3, 3), (3, 2, 2, 3), (3, 2, 3, 3), (3, 3, 3, 3), (3, 3, 3, 3), (3, 2, 2, 3), (3, 3, 2, 3)],
-    {
-        "img": np.array(
+TEST_CASE_2 = []
+for p in TEST_NDARRAYS:
+    TEST_CASE_2.append(
+        [
+            {"keys": ["img", "seg"], "num_samples": 8, "roi_size": [2, 2, 3], "random_center": False},
+            {"img": p(np.arange(81).reshape(3, 3, 3, 3)), "seg": p(np.arange(81, 0, -1).reshape(3, 3, 3, 3))},
             [
-                [[[0, 1, 2], [3, 4, 5]], [[9, 10, 11], [12, 13, 14]], [[18, 19, 20], [21, 22, 23]]],
-                [[[27, 28, 29], [30, 31, 32]], [[36, 37, 38], [39, 40, 41]], [[45, 46, 47], [48, 49, 50]]],
-                [[[54, 55, 56], [57, 58, 59]], [[63, 64, 65], [66, 67, 68]], [[72, 73, 74], [75, 76, 77]]],
-            ]
-        ),
-        "seg": np.array(
-            [
-                [[[81, 80, 79], [78, 77, 76]], [[72, 71, 70], [69, 68, 67]], [[63, 62, 61], [60, 59, 58]]],
-                [[[54, 53, 52], [51, 50, 49]], [[45, 44, 43], [42, 41, 40]], [[36, 35, 34], [33, 32, 31]]],
-                [[[27, 26, 25], [24, 23, 22]], [[18, 17, 16], [15, 14, 13]], [[9, 8, 7], [6, 5, 4]]],
-            ]
-        ),
-    },
-]
+                (3, 3, 3, 3),
+                (3, 2, 3, 3),
+                (3, 2, 2, 3),
+                (3, 2, 3, 3),
+                (3, 3, 3, 3),
+                (3, 3, 3, 3),
+                (3, 2, 2, 3),
+                (3, 3, 2, 3),
+            ],
+            {
+                "img": p(
+                    np.array(
+                        [
+                            [[[0, 1, 2], [3, 4, 5]], [[9, 10, 11], [12, 13, 14]], [[18, 19, 20], [21, 22, 23]]],
+                            [[[27, 28, 29], [30, 31, 32]], [[36, 37, 38], [39, 40, 41]], [[45, 46, 47], [48, 49, 50]]],
+                            [[[54, 55, 56], [57, 58, 59]], [[63, 64, 65], [66, 67, 68]], [[72, 73, 74], [75, 76, 77]]],
+                        ]
+                    )
+                ),
+                "seg": p(
+                    np.array(
+                        [
+                            [[[81, 80, 79], [78, 77, 76]], [[72, 71, 70], [69, 68, 67]], [[63, 62, 61], [60, 59, 58]]],
+                            [[[54, 53, 52], [51, 50, 49]], [[45, 44, 43], [42, 41, 40]], [[36, 35, 34], [33, 32, 31]]],
+                            [[[27, 26, 25], [24, 23, 22]], [[18, 17, 16], [15, 14, 13]], [[9, 8, 7], [6, 5, 4]]],
+                        ]
+                    )
+                ),
+            },
+        ]
+    )
 
 
 class TestRandSpatialCropSamplesd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2])
+    @parameterized.expand([TEST_CASE_1, *TEST_CASE_2])
     def test_shape(self, input_param, input_data, expected_shape, expected_last):
         xform = RandSpatialCropSamplesd(**input_param)
         xform.set_random_state(1234)
@@ -73,8 +91,8 @@ def test_shape(self, input_param, input_data, expected_shape, expected_last):
         for i, item in enumerate(result):
             self.assertEqual(item["img_meta_dict"]["patch_index"], i)
             self.assertEqual(item["seg_meta_dict"]["patch_index"], i)
-        np.testing.assert_allclose(item["img"], expected_last["img"])
-        np.testing.assert_allclose(item["seg"], expected_last["seg"])
+        assert_allclose(item["img"], expected_last["img"], type_test=True)
+        assert_allclose(item["seg"], expected_last["seg"], type_test=True)
 
     def test_deep_copy(self):
         data = {"img": np.ones((1, 10, 11, 12))}
diff --git a/tests/test_rand_spatial_cropd.py b/tests/test_rand_spatial_cropd.py
index 610c1974aa..edcb61dc99 100644
--- a/tests/test_rand_spatial_cropd.py
+++ b/tests/test_rand_spatial_cropd.py
@@ -15,6 +15,7 @@
 from parameterized import parameterized
 
 from monai.transforms import RandSpatialCropd
+from tests.utils import TEST_NDARRAYS
 
 TEST_CASE_0 = [
     {"keys": "img", "roi_size": [3, 3, -1], "random_center": True},
@@ -67,10 +68,12 @@ def test_value(self, input_param, input_data):
 
     @parameterized.expand([TEST_CASE_4, TEST_CASE_5])
     def test_random_shape(self, input_param, input_data, expected_shape):
-        cropper = RandSpatialCropd(**input_param)
-        cropper.set_random_state(seed=123)
-        result = cropper(input_data)
-        self.assertTupleEqual(result["img"].shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            cropper = RandSpatialCropd(**input_param)
+            cropper.set_random_state(seed=123)
+            input_data["img"] = p(input_data["img"])
+            result = cropper(input_data)
+            self.assertTupleEqual(result["img"].shape, expected_shape)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_zoom.py b/tests/test_rand_zoom.py
index c21bc8b9e9..6ccb265cca 100644
--- a/tests/test_rand_zoom.py
+++ b/tests/test_rand_zoom.py
@@ -17,7 +17,7 @@
 
 from monai.transforms import RandZoom
 from monai.utils import GridSampleMode, InterpolateMode
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(0.8, 1.2, "nearest", False), (0.8, 1.2, InterpolateMode.NEAREST, False)]
 
@@ -25,36 +25,34 @@
 class TestRandZoom(NumpyImageTestCase2D):
     @parameterized.expand(VALID_CASES)
     def test_correct_results(self, min_zoom, max_zoom, mode, keep_size):
-        random_zoom = RandZoom(
-            prob=1.0,
-            min_zoom=min_zoom,
-            max_zoom=max_zoom,
-            mode=mode,
-            keep_size=keep_size,
-        )
-        random_zoom.set_random_state(1234)
-        zoomed = random_zoom(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=random_zoom._zoom, mode="nearest", order=0, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(zoomed, expected, atol=1.0)
+        for p in TEST_NDARRAYS:
+            random_zoom = RandZoom(
+                prob=1.0,
+                min_zoom=min_zoom,
+                max_zoom=max_zoom,
+                mode=mode,
+                keep_size=keep_size,
+            )
+            random_zoom.set_random_state(1234)
+            zoomed = random_zoom(p(self.imt[0]))
+            expected = [
+                zoom_scipy(channel, zoom=random_zoom._zoom, mode="nearest", order=0, prefilter=False)
+                for channel in self.imt[0]
+            ]
+
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed, p(expected), atol=1.0)
 
     def test_keep_size(self):
-        random_zoom = RandZoom(
-            prob=1.0,
-            min_zoom=0.6,
-            max_zoom=0.7,
-            keep_size=True,
-            padding_mode="constant",
-            constant_values=2,
-        )
-        zoomed = random_zoom(self.imt[0])
-        self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
-        zoomed = random_zoom(self.imt[0])
-        self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
-        zoomed = random_zoom(self.imt[0])
-        self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
+        for p in TEST_NDARRAYS:
+            im = p(self.imt[0])
+            random_zoom = RandZoom(prob=1.0, min_zoom=0.6, max_zoom=0.7, keep_size=True)
+            zoomed = random_zoom(im)
+            self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
+            zoomed = random_zoom(im)
+            self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
+            zoomed = random_zoom(im)
+            self.assertTrue(np.array_equal(zoomed.shape, self.imt.shape[1:]))
 
     @parameterized.expand(
         [
@@ -64,23 +62,25 @@ def test_keep_size(self):
         ]
     )
     def test_invalid_inputs(self, _, min_zoom, max_zoom, mode, raises):
-        with self.assertRaises(raises):
-            random_zoom = RandZoom(prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
-            random_zoom(self.imt[0])
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                random_zoom = RandZoom(prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
+                random_zoom(p(self.imt[0]))
 
     def test_auto_expand_3d(self):
-        random_zoom = RandZoom(
-            prob=1.0,
-            min_zoom=[0.8, 0.7],
-            max_zoom=[1.2, 1.3],
-            mode="nearest",
-            keep_size=False,
-        )
-        random_zoom.set_random_state(1234)
-        test_data = np.random.randint(0, 2, size=[2, 2, 3, 4])
-        zoomed = random_zoom(test_data)
-        np.testing.assert_allclose(random_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
-        np.testing.assert_allclose(zoomed.shape, (2, 2, 3, 3))
+        for p in TEST_NDARRAYS:
+            random_zoom = RandZoom(
+                prob=1.0,
+                min_zoom=[0.8, 0.7],
+                max_zoom=[1.2, 1.3],
+                mode="nearest",
+                keep_size=False,
+            )
+            random_zoom.set_random_state(1234)
+            test_data = p(np.random.randint(0, 2, size=[2, 2, 3, 4]))
+            zoomed = random_zoom(test_data)
+            assert_allclose(random_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
+            assert_allclose(zoomed.shape, (2, 2, 3, 3))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rand_zoomd.py b/tests/test_rand_zoomd.py
index 4ccb1aad64..842d207ca6 100644
--- a/tests/test_rand_zoomd.py
+++ b/tests/test_rand_zoomd.py
@@ -16,7 +16,7 @@
 from scipy.ndimage import zoom as zoom_scipy
 
 from monai.transforms import RandZoomd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(0.8, 1.2, "nearest", None, False)]
 
@@ -34,14 +34,17 @@ def test_correct_results(self, min_zoom, max_zoom, mode, align_corners, keep_siz
             align_corners=align_corners,
             keep_size=keep_size,
         )
-        random_zoom.set_random_state(1234)
+        for p in TEST_NDARRAYS:
+            random_zoom.set_random_state(1234)
 
-        zoomed = random_zoom({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=random_zoom._zoom, mode="nearest", order=0, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(expected, zoomed[key], atol=1.0)
+            zoomed = random_zoom({key: p(self.imt[0])})
+            expected = [
+                zoom_scipy(channel, zoom=random_zoom._zoom, mode="nearest", order=0, prefilter=False)
+                for channel in self.imt[0]
+            ]
+
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed[key], p(expected), atol=1.0)
 
     def test_keep_size(self):
         key = "img"
@@ -54,17 +57,19 @@ def test_keep_size(self):
             padding_mode="constant",
             constant_values=2,
         )
-        zoomed = random_zoom({key: self.imt[0]})
-        self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
+        for p in TEST_NDARRAYS:
+            zoomed = random_zoom({key: p(self.imt[0])})
+            np.testing.assert_array_equal(zoomed[key].shape, self.imt.shape[1:])
 
     @parameterized.expand(
         [("no_min_zoom", None, 1.1, "bilinear", TypeError), ("invalid_order", 0.9, 1.1, "s", ValueError)]
     )
     def test_invalid_inputs(self, _, min_zoom, max_zoom, mode, raises):
         key = "img"
-        with self.assertRaises(raises):
-            random_zoom = RandZoomd(key, prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
-            random_zoom({key: self.imt[0]})
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                random_zoom = RandZoomd(key, prob=1.0, min_zoom=min_zoom, max_zoom=max_zoom, mode=mode)
+                random_zoom({key: p(self.imt[0])})
 
     def test_auto_expand_3d(self):
         random_zoom = RandZoomd(
@@ -75,11 +80,12 @@ def test_auto_expand_3d(self):
             mode="nearest",
             keep_size=False,
         )
-        random_zoom.set_random_state(1234)
-        test_data = {"img": np.random.randint(0, 2, size=[2, 2, 3, 4])}
-        zoomed = random_zoom(test_data)
-        np.testing.assert_allclose(random_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
-        np.testing.assert_allclose(zoomed["img"].shape, (2, 2, 3, 3))
+        for p in TEST_NDARRAYS:
+            random_zoom.set_random_state(1234)
+            test_data = {"img": p(np.random.randint(0, 2, size=[2, 2, 3, 4]))}
+            zoomed = random_zoom(test_data)
+            assert_allclose(random_zoom._zoom, (1.048844, 1.048844, 0.962637), atol=1e-2)
+            assert_allclose(zoomed["img"].shape, (2, 2, 3, 3))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_reg_loss_integration.py b/tests/test_reg_loss_integration.py
index b864a64647..1578aa4888 100644
--- a/tests/test_reg_loss_integration.py
+++ b/tests/test_reg_loss_integration.py
@@ -17,6 +17,7 @@
 from parameterized import parameterized
 
 from monai.losses import BendingEnergyLoss, GlobalMutualInformationLoss, LocalNormalizedCrossCorrelationLoss
+from tests.utils import SkipIfBeforePyTorchVersion
 
 TEST_CASES = [
     [BendingEnergyLoss, {}, ["pred"]],
@@ -36,6 +37,7 @@
         ["pred", "target"],
     ],
     [GlobalMutualInformationLoss, {"num_bins": 10}, ["pred", "target"]],
+    [GlobalMutualInformationLoss, {"kernel_type": "b-spline", "num_bins": 10}, ["pred", "target"]],
 ]
 
 
@@ -51,6 +53,7 @@ def tearDown(self):
         torch.backends.cudnn.benchmark = True
 
     @parameterized.expand(TEST_CASES)
+    @SkipIfBeforePyTorchVersion((1, 9))
     def test_convergence(self, loss_type, loss_args, forward_args):
         """
         The goal of this test is to assess if the gradient of the loss function
@@ -69,7 +72,7 @@ def test_convergence(self, loss_type, loss_args, forward_args):
         # define a one layer model
         class OnelayerNet(nn.Module):
             def __init__(self):
-                super(OnelayerNet, self).__init__()
+                super().__init__()
                 self.layer = nn.Sequential(
                     nn.Conv3d(in_channels=1, out_channels=1, kernel_size=3, padding=1),
                     nn.ReLU(),
diff --git a/tests/test_resampler.py b/tests/test_resampler.py
index 2be94acebd..af23421ecc 100644
--- a/tests/test_resampler.py
+++ b/tests/test_resampler.py
@@ -17,69 +17,146 @@
 
 from monai.transforms import Resample
 from monai.transforms.utils import create_grid
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"grid": create_grid((2, 2)), "img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 1.0], [2.0, 3.0]]]),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4)), "img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]),
-    ],
-    [
-        dict(padding_mode="border", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4)), "img": np.arange(4).reshape((1, 2, 2))},
-        np.array([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3, 3.0], [2.0, 2.0, 3.0, 3.0]]]),
-    ],
-    [
-        dict(padding_mode="reflection", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4)), "img": np.arange(4).reshape((1, 2, 2)), "mode": "nearest"},
-        np.array([[[3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0], [3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0]]]),
-    ],
-    [
-        dict(padding_mode="zeros", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4, 4)), "img": np.arange(8).reshape((1, 2, 2, 2)), "mode": "bilinear"},
-        np.array(
-            [
+TESTS = []
+for p in TEST_NDARRAYS:
+    for q in TEST_NDARRAYS:
+        for device in [None, "cpu", "cuda"] if torch.cuda.is_available() else [None, "cpu"]:
+            TESTS.append(
                 [
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 4.0, 5.0, 0.0], [0.0, 6.0, 7.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
-                    [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]],
+                    dict(padding_mode="zeros", device=device),
+                    {"grid": p(create_grid((2, 2))), "img": q(np.arange(4).reshape((1, 2, 2)))},
+                    q(np.array([[[0.0, 1.0], [2.0, 3.0]]])),
                 ]
-            ]
-        ),
-    ],
-    [
-        dict(padding_mode="border", as_tensor_output=False, device=None),
-        {"grid": create_grid((4, 4, 4)), "img": np.arange(8).reshape((1, 2, 2, 2)), "mode": "bilinear"},
-        np.array(
-            [
+            )
+            TESTS.append(
                 [
-                    [[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3.0, 3.0], [2.0, 2.0, 3.0, 3.0]],
-                    [[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3.0, 3.0], [2.0, 2.0, 3.0, 3.0]],
-                    [[4.0, 4.0, 5.0, 5.0], [4.0, 4.0, 5.0, 5.0], [6.0, 6.0, 7.0, 7.0], [6.0, 6.0, 7.0, 7.0]],
-                    [[4.0, 4.0, 5.0, 5.0], [4.0, 4.0, 5.0, 5.0], [6.0, 6.0, 7.0, 7.0], [6.0, 6.0, 7.0, 7.0]],
+                    dict(padding_mode="zeros", device=device),
+                    {"grid": p(create_grid((4, 4))), "img": q(np.arange(4).reshape((1, 2, 2)))},
+                    q(
+                        np.array(
+                            [[[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 0.0], [0.0, 2.0, 3.0, 0.0], [0.0, 0.0, 0.0, 0.0]]]
+                        )
+                    ),
                 ]
-            ]
-        ),
-    ],
-]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="border", device=device),
+                    {"grid": p(create_grid((4, 4))), "img": q(np.arange(4).reshape((1, 2, 2)))},
+                    q(
+                        np.array(
+                            [[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0], [2.0, 2.0, 3, 3.0], [2.0, 2.0, 3.0, 3.0]]]
+                        )
+                    ),
+                ]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="reflection", device=device),
+                    {"grid": p(create_grid((4, 4))), "img": q(np.arange(4).reshape((1, 2, 2))), "mode": "nearest"},
+                    q(
+                        np.array(
+                            [[[3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0], [3.0, 2.0, 3.0, 2.0], [1.0, 0.0, 1.0, 0.0]]]
+                        )
+                    ),
+                ]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="zeros", device=device),
+                    {
+                        "grid": p(create_grid((4, 4, 4))),
+                        "img": q(np.arange(8).reshape((1, 2, 2, 2))),
+                        "mode": "bilinear",
+                    },
+                    q(
+                        np.array(
+                            [
+                                [
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 1.0, 0.0],
+                                        [0.0, 2.0, 3.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 4.0, 5.0, 0.0],
+                                        [0.0, 6.0, 7.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                        [0.0, 0.0, 0.0, 0.0],
+                                    ],
+                                ]
+                            ]
+                        )
+                    ),
+                ]
+            )
+            TESTS.append(
+                [
+                    dict(padding_mode="border", device=device),
+                    {
+                        "grid": p(create_grid((4, 4, 4))),
+                        "img": q(np.arange(8).reshape((1, 2, 2, 2))),
+                        "mode": "bilinear",
+                    },
+                    q(
+                        np.array(
+                            [
+                                [
+                                    [
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                    ],
+                                    [
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [0.0, 0.0, 1.0, 1.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                        [2.0, 2.0, 3.0, 3.0],
+                                    ],
+                                    [
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                    ],
+                                    [
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [4.0, 4.0, 5.0, 5.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                        [6.0, 6.0, 7.0, 7.0],
+                                    ],
+                                ]
+                            ]
+                        )
+                    ),
+                ]
+            )
 
 
 class TestResample(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_resample(self, input_param, input_data, expected_val):
         g = Resample(**input_param)
         result = g(**input_data)
-        self.assertEqual(isinstance(result, torch.Tensor), isinstance(expected_val, torch.Tensor))
-        if isinstance(result, torch.Tensor):
-            np.testing.assert_allclose(result.cpu().numpy(), expected_val.cpu().numpy(), rtol=1e-4, atol=1e-4)
-        else:
-            np.testing.assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
+        if "device" in input_data:
+            self.assertEqual(result.device, input_data["device"])
+        assert_allclose(result, expected_val, rtol=1e-4, atol=1e-4)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_resize.py b/tests/test_resize.py
index e5ec5dd1a9..f6c4a8b14b 100644
--- a/tests/test_resize.py
+++ b/tests/test_resize.py
@@ -16,7 +16,7 @@
 from parameterized import parameterized
 
 from monai.transforms import Resize
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_0 = [{"spatial_size": 15}, (6, 10, 15)]
 
@@ -45,16 +45,22 @@ def test_correct_results(self, spatial_size, mode):
             _order = 1
         if spatial_size == (32, -1):
             spatial_size = (32, 64)
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(
-                skimage.transform.resize(
-                    channel, spatial_size, order=_order, clip=False, preserve_range=False, anti_aliasing=False
-                )
+        expected = [
+            skimage.transform.resize(
+                channel,
+                spatial_size,
+                order=_order,
+                clip=False,
+                preserve_range=False,
+                anti_aliasing=False,
             )
+            for channel in self.imt[0]
+        ]
+
         expected = np.stack(expected).astype(np.float32)
-        out = resize(self.imt[0])
-        np.testing.assert_allclose(out, expected, atol=0.9)
+        for p in TEST_NDARRAYS:
+            out = resize(p(self.imt[0]))
+            assert_allclose(out, expected, type_test=False, atol=0.9)
 
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2])
     def test_longest_shape(self, input_param, expected_shape):
diff --git a/tests/test_resize_with_pad_or_crop.py b/tests/test_resize_with_pad_or_crop.py
index 46f1fc86cc..2162a0bb1b 100644
--- a/tests/test_resize_with_pad_or_crop.py
+++ b/tests/test_resize_with_pad_or_crop.py
@@ -12,9 +12,11 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import ResizeWithPadOrCrop
+from tests.utils import TEST_NDARRAYS
 
 TEST_CASES = [
     [
@@ -48,11 +50,16 @@
 class TestResizeWithPadOrCrop(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
     def test_pad_shape(self, input_param, input_shape, expected_shape):
-        paddcroper = ResizeWithPadOrCrop(**input_param)
-        result = paddcroper(np.zeros(input_shape))
-        np.testing.assert_allclose(result.shape, expected_shape)
-        result = paddcroper(np.zeros(input_shape), mode="constant")
-        np.testing.assert_allclose(result.shape, expected_shape)
+        for p in TEST_NDARRAYS:
+            if isinstance(p(0), torch.Tensor) and (
+                "constant_values" in input_param or input_param["mode"] == "reflect"
+            ):
+                continue
+            paddcroper = ResizeWithPadOrCrop(**input_param)
+            result = paddcroper(p(np.zeros(input_shape)))
+            np.testing.assert_allclose(result.shape, expected_shape)
+            result = paddcroper(p(np.zeros(input_shape)), mode="constant")
+            np.testing.assert_allclose(result.shape, expected_shape)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_resize_with_pad_or_cropd.py b/tests/test_resize_with_pad_or_cropd.py
index 32a62a9e16..58f6c92a8f 100644
--- a/tests/test_resize_with_pad_or_cropd.py
+++ b/tests/test_resize_with_pad_or_cropd.py
@@ -12,9 +12,11 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import ResizeWithPadOrCropd
+from tests.utils import TEST_NDARRAYS
 
 TEST_CASES = [
     [
@@ -48,9 +50,15 @@
 class TestResizeWithPadOrCropd(unittest.TestCase):
     @parameterized.expand(TEST_CASES)
     def test_pad_shape(self, input_param, input_data, expected_val):
-        paddcroper = ResizeWithPadOrCropd(**input_param)
-        result = paddcroper(input_data)
-        np.testing.assert_allclose(result["img"].shape, expected_val)
+        for p in TEST_NDARRAYS:
+            if isinstance(p(0), torch.Tensor) and (
+                "constant_values" in input_param or input_param["mode"] == "reflect"
+            ):
+                continue
+            paddcroper = ResizeWithPadOrCropd(**input_param)
+            input_data["img"] = p(input_data["img"])
+            result = paddcroper(input_data)
+            np.testing.assert_allclose(result["img"].shape, expected_val)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_resized.py b/tests/test_resized.py
index 930faf00eb..47b8e8a704 100644
--- a/tests/test_resized.py
+++ b/tests/test_resized.py
@@ -16,7 +16,7 @@
 from parameterized import parameterized
 
 from monai.transforms import Resized
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 TEST_CASE_0 = [{"keys": "img", "spatial_size": 15}, (6, 10, 15)]
 
@@ -48,16 +48,22 @@ def test_correct_results(self, spatial_size, mode):
             _order = 1
         if spatial_size == (32, -1):
             spatial_size = (32, 64)
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(
-                skimage.transform.resize(
-                    channel, spatial_size, order=_order, clip=False, preserve_range=False, anti_aliasing=False
-                )
+        expected = [
+            skimage.transform.resize(
+                channel,
+                spatial_size,
+                order=_order,
+                clip=False,
+                preserve_range=False,
+                anti_aliasing=False,
             )
+            for channel in self.imt[0]
+        ]
+
         expected = np.stack(expected).astype(np.float32)
-        out = resize({"img": self.imt[0]})["img"]
-        np.testing.assert_allclose(out, expected, atol=0.9)
+        for p in TEST_NDARRAYS:
+            out = resize({"img": p(self.imt[0])})["img"]
+            assert_allclose(out, expected, type_test=False, atol=0.9)
 
     @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
     def test_longest_shape(self, input_param, expected_shape):
diff --git a/tests/test_resnet.py b/tests/test_resnet.py
index c4ba5c2e16..16cd6f4865 100644
--- a/tests/test_resnet.py
+++ b/tests/test_resnet.py
@@ -42,14 +42,26 @@
     (2, 3),
 ]
 
+TEST_CASE_2_A = [  # 2D, batch 2, 1 input channel, shortcut type A
+    {"pretrained": False, "spatial_dims": 2, "n_input_channels": 1, "num_classes": 3, "shortcut_type": "A"},
+    (2, 1, 32, 64),
+    (2, 3),
+]
+
 TEST_CASE_3 = [  # 1D, batch 1, 2 input channels
     {"pretrained": False, "spatial_dims": 1, "n_input_channels": 2, "num_classes": 3},
     (1, 2, 32),
     (1, 3),
 ]
 
+TEST_CASE_3_A = [  # 1D, batch 1, 2 input channels
+    {"pretrained": False, "spatial_dims": 1, "n_input_channels": 2, "num_classes": 3, "shortcut_type": "A"},
+    (1, 2, 32),
+    (1, 3),
+]
+
 TEST_CASES = []
-for case in [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]:
+for case in [TEST_CASE_1, TEST_CASE_2, TEST_CASE_3, TEST_CASE_2_A, TEST_CASE_3_A]:
     for model in [resnet10, resnet18, resnet34, resnet50, resnet101, resnet152, resnet200]:
         TEST_CASES.append([model, *case])
 
diff --git a/tests/test_rotate.py b/tests/test_rotate.py
index 436c952d4b..16a9c6d124 100644
--- a/tests/test_rotate.py
+++ b/tests/test_rotate.py
@@ -10,42 +10,44 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import Rotate
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
-
-TEST_CASES_2D = [
-    (np.pi / 6, False, "bilinear", "border", False),
-    (np.pi / 4, True, "bilinear", "border", False),
-    (-np.pi / 4.5, True, "nearest", "reflection", False),
-    (np.pi, False, "nearest", "zeros", False),
-    (-np.pi / 2, False, "bilinear", "zeros", True),
-]
-
-TEST_CASES_3D = [
-    (-np.pi / 2, True, "nearest", "border", False),
-    (np.pi / 4, True, "bilinear", "border", False),
-    (-np.pi / 4.5, True, "nearest", "reflection", False),
-    (np.pi, False, "nearest", "zeros", False),
-    (-np.pi / 2, False, "bilinear", "zeros", False),
-]
-
-TEST_CASES_SHAPE_3D = [
-    ([-np.pi / 2, 1.0, 2.0], "nearest", "border", False),
-    ([np.pi / 4, 0, 0], "bilinear", "border", False),
-    ([-np.pi / 4.5, -20, 20], "nearest", "reflection", False),
-]
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
+
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, np.pi / 6, False, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, -np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_2D.append((p, np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_2D.append((p, -np.pi / 2, False, "bilinear", "zeros", True))
+
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append((p, -np.pi / 2, True, "nearest", "border", False))
+    TEST_CASES_3D.append((p, np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_3D.append((p, -np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_3D.append((p, np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_3D.append((p, -np.pi / 2, False, "bilinear", "zeros", False))
+
+TEST_CASES_SHAPE_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_SHAPE_3D.append((p, [-np.pi / 2, 1.0, 2.0], "nearest", "border", False))
+    TEST_CASES_SHAPE_3D.append((p, [np.pi / 4, 0, 0], "bilinear", "border", False))
+    TEST_CASES_SHAPE_3D.append((p, [-np.pi / 4.5, -20, 20], "nearest", "reflection", False))
 
 
 class TestRotate2D(NumpyImageTestCase2D):
     @parameterized.expand(TEST_CASES_2D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotate(angle, keep_size, mode, padding_mode, align_corners)
-        rotated = rotate_fn(self.imt[0])
+        rotated = rotate_fn(im_type(self.imt[0]))
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated.shape)
         _order = 0 if mode == "nearest" else 1
@@ -70,15 +72,16 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
                 )
             )
         expected = np.stack(expected).astype(np.float32)
+        rotated = rotated.cpu() if isinstance(rotated, torch.Tensor) else rotated
         good = np.sum(np.isclose(expected, rotated, atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
 
 class TestRotate3D(NumpyImageTestCase3D):
     @parameterized.expand(TEST_CASES_3D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotate([angle, 0, 0], keep_size, mode, padding_mode, align_corners)
-        rotated = rotate_fn(self.imt[0])
+        rotated = rotate_fn(im_type(self.imt[0]))
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated.shape)
         _order = 0 if mode == "nearest" else 1
@@ -103,23 +106,25 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
                 )
             )
         expected = np.stack(expected).astype(np.float32)
+        rotated = rotated.cpu() if isinstance(rotated, torch.Tensor) else rotated
         n_good = np.sum(np.isclose(expected, rotated, atol=1e-3))
         self.assertLessEqual(expected.size - n_good, 5, "diff at most 5 pixels")
 
     @parameterized.expand(TEST_CASES_SHAPE_3D)
-    def test_correct_shape(self, angle, mode, padding_mode, align_corners):
+    def test_correct_shape(self, im_type, angle, mode, padding_mode, align_corners):
         rotate_fn = Rotate(angle, True, align_corners=align_corners)
-        rotated = rotate_fn(self.imt[0], mode=mode, padding_mode=padding_mode)
+        rotated = rotate_fn(im_type(self.imt[0]), mode=mode, padding_mode=padding_mode)
         np.testing.assert_allclose(self.imt[0].shape, rotated.shape)
 
     def test_ill_case(self):
-        rotate_fn = Rotate(10, True)
-        with self.assertRaises(ValueError):  # wrong shape
-            rotate_fn(self.imt)
-
-        rotate_fn = Rotate(10, keep_size=False)
-        with self.assertRaises(ValueError):  # wrong mode
-            rotate_fn(self.imt[0], mode="trilinear")
+        for p in TEST_NDARRAYS:
+            rotate_fn = Rotate(10, True)
+            with self.assertRaises(ValueError):  # wrong shape
+                rotate_fn(p(self.imt))
+
+            rotate_fn = Rotate(10, keep_size=False)
+            with self.assertRaises(ValueError):  # wrong mode
+                rotate_fn(p(self.imt[0]), mode="trilinear")
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rotate90.py b/tests/test_rotate90.py
index 4ab39d5cf6..9857b26fe8 100644
--- a/tests/test_rotate90.py
+++ b/tests/test_rotate90.py
@@ -14,45 +14,41 @@
 import numpy as np
 
 from monai.transforms import Rotate90
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRotate90(NumpyImageTestCase2D):
     def test_rotate90_default(self):
         rotate = Rotate90()
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_k(self):
         rotate = Rotate90(k=2)
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_spatial_axes(self):
         rotate = Rotate90(spatial_axes=(0, -1))
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, -1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 1, (0, -1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
     def test_prob_k_spatial_axes(self):
         rotate = Rotate90(k=2, spatial_axes=(0, 1))
-        rotated = rotate(self.imt[0])
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated, expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate(p(self.imt[0]))
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated, p(expected), rtol=1.0e-5, atol=1.0e-8)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_rotate90d.py b/tests/test_rotate90d.py
index 3d71ead82a..a2a4a27521 100644
--- a/tests/test_rotate90d.py
+++ b/tests/test_rotate90d.py
@@ -14,49 +14,45 @@
 import numpy as np
 
 from monai.transforms import Rotate90d
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestRotate90d(NumpyImageTestCase2D):
     def test_rotate90_default(self):
         key = "test"
         rotate = Rotate90d(keys=key)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_k(self):
         key = None
         rotate = Rotate90d(keys=key, k=2)
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_spatial_axes(self):
         key = "test"
         rotate = Rotate90d(keys=key, spatial_axes=(0, 1))
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 1, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 1, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_prob_k_spatial_axes(self):
         key = "test"
         rotate = Rotate90d(keys=key, k=2, spatial_axes=(0, 1))
-        rotated = rotate({key: self.imt[0]})
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(np.rot90(channel, 2, (0, 1)))
-        expected = np.stack(expected)
-        self.assertTrue(np.allclose(rotated[key], expected))
+        for p in TEST_NDARRAYS:
+            rotated = rotate({key: p(self.imt[0])})
+            expected = [np.rot90(channel, 2, (0, 1)) for channel in self.imt[0]]
+            expected = np.stack(expected)
+            assert_allclose(rotated[key], p(expected))
 
     def test_no_key(self):
         key = "unknown"
diff --git a/tests/test_rotated.py b/tests/test_rotated.py
index 2ea421101b..cd27dd5406 100644
--- a/tests/test_rotated.py
+++ b/tests/test_rotated.py
@@ -10,36 +10,38 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
 import scipy.ndimage
+import torch
 from parameterized import parameterized
 
 from monai.transforms import Rotated
-from tests.utils import NumpyImageTestCase2D, NumpyImageTestCase3D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, NumpyImageTestCase3D
 
-TEST_CASES_2D = [
-    (-np.pi / 6, False, "bilinear", "border", False),
-    (-np.pi / 4, True, "bilinear", "border", False),
-    (np.pi / 4.5, True, "nearest", "reflection", False),
-    (-np.pi, False, "nearest", "zeros", False),
-    (np.pi / 2, False, "bilinear", "zeros", True),
-]
+TEST_CASES_2D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_2D.append((p, -np.pi / 6, False, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, -np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_2D.append((p, np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_2D.append((p, -np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_2D.append((p, np.pi / 2, False, "bilinear", "zeros", True))
 
-TEST_CASES_3D = [
-    (-np.pi / 6, False, "bilinear", "border", False),
-    (-np.pi / 4, True, "bilinear", "border", False),
-    (np.pi / 4.5, True, "nearest", "reflection", False),
-    (-np.pi, False, "nearest", "zeros", False),
-    (np.pi / 2, False, "bilinear", "zeros", True),
-]
+TEST_CASES_3D: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TEST_CASES_3D.append((p, -np.pi / 6, False, "bilinear", "border", False))
+    TEST_CASES_3D.append((p, -np.pi / 4, True, "bilinear", "border", False))
+    TEST_CASES_3D.append((p, np.pi / 4.5, True, "nearest", "reflection", False))
+    TEST_CASES_3D.append((p, -np.pi, False, "nearest", "zeros", False))
+    TEST_CASES_3D.append((p, np.pi / 2, False, "bilinear", "zeros", True))
 
 
 class TestRotated2D(NumpyImageTestCase2D):
     @parameterized.expand(TEST_CASES_2D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotated(("img", "seg"), angle, keep_size, (mode, "nearest"), padding_mode, align_corners)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated["img"].shape)
         _order = 0 if mode == "nearest" else 1
@@ -52,6 +54,8 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         good = np.sum(np.isclose(expected, rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 pixels")
 
@@ -64,9 +68,9 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
 
 class TestRotated3D(NumpyImageTestCase3D):
     @parameterized.expand(TEST_CASES_3D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotated(("img", "seg"), [0, angle, 0], keep_size, (mode, "nearest"), padding_mode, align_corners)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated["img"].shape)
         _order = 0 if mode == "nearest" else 1
@@ -79,6 +83,8 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], np.rad2deg(angle), (0, 2), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         good = np.sum(np.isclose(expected.astype(np.float32), rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 voxels.")
 
@@ -91,9 +97,9 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
 
 class TestRotated3DXY(NumpyImageTestCase3D):
     @parameterized.expand(TEST_CASES_3D)
-    def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corners):
+    def test_correct_results(self, im_type, angle, keep_size, mode, padding_mode, align_corners):
         rotate_fn = Rotated(("img", "seg"), [0, 0, angle], keep_size, (mode, "nearest"), padding_mode, align_corners)
-        rotated = rotate_fn({"img": self.imt[0], "seg": self.segn[0]})
+        rotated = rotate_fn({"img": im_type(self.imt[0]), "seg": im_type(self.segn[0])})
         if keep_size:
             np.testing.assert_allclose(self.imt[0].shape, rotated["img"].shape)
         _order = 0 if mode == "nearest" else 1
@@ -106,6 +112,8 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], -np.rad2deg(angle), (0, 1), not keep_size, order=_order, mode=_mode, prefilter=False
         )
+        for k, v in rotated.items():
+            rotated[k] = v.cpu() if isinstance(v, torch.Tensor) else v
         good = np.sum(np.isclose(expected, rotated["img"][0], atol=1e-3))
         self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 voxels")
 
diff --git a/tests/test_save_classificationd.py b/tests/test_save_classificationd.py
index 67dc0320a6..26ce3176e8 100644
--- a/tests/test_save_classificationd.py
+++ b/tests/test_save_classificationd.py
@@ -83,7 +83,7 @@ def test_saved_content(self):
             def _test_file(filename, count):
                 filepath = os.path.join(tempdir, filename)
                 self.assertTrue(os.path.exists(filepath))
-                with open(filepath, "r") as f:
+                with open(filepath) as f:
                     reader = csv.reader(f)
                     i = 0
                     for row in reader:
diff --git a/tests/test_savitzky_golay_smooth.py b/tests/test_savitzky_golay_smooth.py
index 45d0ea3e4d..0f398bc48f 100644
--- a/tests/test_savitzky_golay_smooth.py
+++ b/tests/test_savitzky_golay_smooth.py
@@ -25,14 +25,14 @@
     np.expand_dims(np.array([1.0]), 0),  # Input data: Single value
     np.expand_dims(np.array([1 / 3]), 0),  # Expected output: With a window length of 3 and polyorder 1
     # output should be equal to mean of 0, 1 and 0 = 1/3 (because input will be zero-padded and a linear fit performed)
-    1e-15,  # absolute tolerance
+    1e-5,  # absolute tolerance
 ]
 
 TEST_CASE_2D_AXIS_2 = [
     {"window_length": 3, "order": 1, "axis": 2},  # along axis 2 (second spatial dim)
     np.expand_dims(np.ones((2, 3)), 0),
     np.expand_dims(np.array([[2 / 3, 1.0, 2 / 3], [2 / 3, 1.0, 2 / 3]]), 0),
-    1e-15,  # absolute tolerance
+    1e-5,  # absolute tolerance
 ]
 
 # Replicated-padding trivial tests
@@ -42,7 +42,7 @@
     np.expand_dims(np.array([1.0]), 0),  # Input data: Single value
     np.expand_dims(np.array([1.0]), 0),  # Expected output: With a window length of 3 and polyorder 1
     # output will be equal to mean of [1, 1, 1] = 1 (input will be nearest-neighbour-padded and a linear fit performed)
-    1e-15,  # absolute tolerance
+    1e-5,  # absolute tolerance
 ]
 
 # Sine smoothing
@@ -62,16 +62,16 @@ class TestSavitzkyGolaySmooth(unittest.TestCase):
     @parameterized.expand([TEST_CASE_SINGLE_VALUE, TEST_CASE_2D_AXIS_2, TEST_CASE_SINE_SMOOTH])
     def test_value(self, arguments, image, expected_data, atol):
         for p in TEST_NDARRAYS:
-            result = SavitzkyGolaySmooth(**arguments)(p(image))
-            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-7, atol=atol)
+            result = SavitzkyGolaySmooth(**arguments)(p(image.astype(np.float32)))
+            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-4, atol=atol)
 
 
 class TestSavitzkyGolaySmoothREP(unittest.TestCase):
     @parameterized.expand([TEST_CASE_SINGLE_VALUE_REP])
     def test_value(self, arguments, image, expected_data, atol):
         for p in TEST_NDARRAYS:
-            result = SavitzkyGolaySmooth(**arguments)(p(image))
-            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-7, atol=atol)
+            result = SavitzkyGolaySmooth(**arguments)(p(image.astype(np.float32)))
+            torch.testing.assert_allclose(result, p(expected_data.astype(np.float32)), rtol=1e-4, atol=atol)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensity.py b/tests/test_scale_intensity.py
index c2485af616..ddc2fb08e1 100644
--- a/tests/test_scale_intensity.py
+++ b/tests/test_scale_intensity.py
@@ -26,14 +26,26 @@ def test_range_scale(self):
             maxa = self.imt.max()
             norm = (self.imt - mina) / (maxa - mina)
             expected = p((norm * (2.0 - 1.0)) + 1.0)
-            assert_allclose(result, expected, rtol=1e-7, atol=0)
+            assert_allclose(result, expected, type_test=False, rtol=1e-7, atol=0)
 
     def test_factor_scale(self):
         for p in TEST_NDARRAYS:
             scaler = ScaleIntensity(minv=None, maxv=None, factor=0.1)
             result = scaler(p(self.imt))
             expected = p((self.imt * (1 + 0.1)).astype(np.float32))
-            assert_allclose(result, expected, rtol=1e-7, atol=0)
+            assert_allclose(result, p(expected), rtol=1e-7, atol=0)
+
+    def test_channel_wise(self):
+        for p in TEST_NDARRAYS:
+            scaler = ScaleIntensity(minv=1.0, maxv=2.0, channel_wise=True)
+            data = p(self.imt)
+            result = scaler(data)
+            mina = self.imt.min()
+            maxa = self.imt.max()
+            for i, c in enumerate(data):
+                norm = (c - mina) / (maxa - mina)
+                expected = p((norm * (2.0 - 1.0)) + 1.0)
+                assert_allclose(result[i], expected, type_test=False, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensity_range.py b/tests/test_scale_intensity_range.py
index cba07d9157..d06bfd3596 100644
--- a/tests/test_scale_intensity_range.py
+++ b/tests/test_scale_intensity_range.py
@@ -11,19 +11,18 @@
 
 import unittest
 
-import numpy as np
-
 from monai.transforms import ScaleIntensityRange
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class IntensityScaleIntensityRange(NumpyImageTestCase2D):
     def test_image_scale_intensity_range(self):
         scaler = ScaleIntensityRange(a_min=20, a_max=108, b_min=50, b_max=80)
-        scaled = scaler(self.imt)
-        expected = (self.imt - 20) / 88
-        expected = expected * 30 + 50
-        self.assertTrue(np.allclose(scaled, expected))
+        for p in TEST_NDARRAYS:
+            scaled = scaler(p(self.imt))
+            expected = (self.imt - 20) / 88
+            expected = expected * 30 + 50
+            assert_allclose(scaled, p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensity_range_percentiles.py b/tests/test_scale_intensity_range_percentiles.py
index 015162c8de..0024cb349d 100644
--- a/tests/test_scale_intensity_range_percentiles.py
+++ b/tests/test_scale_intensity_range_percentiles.py
@@ -14,7 +14,7 @@
 import numpy as np
 
 from monai.transforms.intensity.array import ScaleIntensityRangePercentiles
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class TestScaleIntensityRangePercentiles(NumpyImageTestCase2D):
@@ -30,7 +30,9 @@ def test_scaling(self):
         expected = (img - a_min) / (a_max - a_min)
         expected = (expected * (b_max - b_min)) + b_min
         scaler = ScaleIntensityRangePercentiles(lower=lower, upper=upper, b_min=b_min, b_max=b_max)
-        self.assertTrue(np.allclose(expected, scaler(img)))
+        for p in TEST_NDARRAYS:
+            result = scaler(p(img))
+            assert_allclose(result, p(expected), rtol=1e-4)
 
     def test_relative_scaling(self):
         img = self.imt
@@ -47,7 +49,9 @@ def test_relative_scaling(self):
         expected_img = (img - expected_a_min) / (expected_a_max - expected_a_min)
         expected_img = (expected_img * (expected_b_max - expected_b_min)) + expected_b_min
 
-        self.assertTrue(np.allclose(expected_img, scaler(img)))
+        for p in TEST_NDARRAYS:
+            result = scaler(p(img))
+            assert_allclose(result, p(expected_img), rtol=1e-4)
 
     def test_invalid_instantiation(self):
         self.assertRaises(ValueError, ScaleIntensityRangePercentiles, lower=-10, upper=99, b_min=0, b_max=255)
diff --git a/tests/test_scale_intensity_ranged.py b/tests/test_scale_intensity_ranged.py
index a8cac414e8..dc064a7708 100644
--- a/tests/test_scale_intensity_ranged.py
+++ b/tests/test_scale_intensity_ranged.py
@@ -11,20 +11,19 @@
 
 import unittest
 
-import numpy as np
-
 from monai.transforms import ScaleIntensityRanged
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 
 class IntensityScaleIntensityRanged(NumpyImageTestCase2D):
     def test_image_scale_intensity_ranged(self):
         key = "img"
         scaler = ScaleIntensityRanged(keys=key, a_min=20, a_max=108, b_min=50, b_max=80)
-        scaled = scaler({key: self.imt})
-        expected = (self.imt - 20) / 88
-        expected = expected * 30 + 50
-        self.assertTrue(np.allclose(scaled[key], expected))
+        for p in TEST_NDARRAYS:
+            scaled = scaler({key: p(self.imt)})
+            expected = (self.imt - 20) / 88
+            expected = expected * 30 + 50
+            assert_allclose(scaled[key], p(expected))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_scale_intensityd.py b/tests/test_scale_intensityd.py
index 6e13dbc272..93449b15e2 100644
--- a/tests/test_scale_intensityd.py
+++ b/tests/test_scale_intensityd.py
@@ -19,23 +19,36 @@
 
 class TestScaleIntensityd(NumpyImageTestCase2D):
     def test_range_scale(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             scaler = ScaleIntensityd(keys=[key], minv=1.0, maxv=2.0)
             result = scaler({key: p(self.imt)})
             mina = np.min(self.imt)
             maxa = np.max(self.imt)
             norm = (self.imt - mina) / (maxa - mina)
             expected = (norm * (2.0 - 1.0)) + 1.0
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
     def test_factor_scale(self):
+        key = "img"
         for p in TEST_NDARRAYS:
-            key = "img"
             scaler = ScaleIntensityd(keys=[key], minv=None, maxv=None, factor=0.1)
             result = scaler({key: p(self.imt)})
             expected = (self.imt * (1 + 0.1)).astype(np.float32)
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
+
+    def test_channel_wise(self):
+        key = "img"
+        for p in TEST_NDARRAYS:
+            scaler = ScaleIntensityd(keys=[key], minv=1.0, maxv=2.0, channel_wise=True)
+            data = p(self.imt)
+            result = scaler({key: data})
+            mina = self.imt.min()
+            maxa = self.imt.max()
+            for i, c in enumerate(data):
+                norm = (c - mina) / (maxa - mina)
+                expected = p((norm * (2.0 - 1.0)) + 1.0)
+                assert_allclose(result[key][i], expected, type_test=False, rtol=1e-7, atol=0)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_seg_loss_integration.py b/tests/test_seg_loss_integration.py
index d2f991f160..98d840afea 100644
--- a/tests/test_seg_loss_integration.py
+++ b/tests/test_seg_loss_integration.py
@@ -91,7 +91,7 @@ def test_convergence(self, loss_type, loss_args, forward_args):
         # define a one layer model
         class OnelayerNet(nn.Module):
             def __init__(self):
-                super(OnelayerNet, self).__init__()
+                super().__init__()
                 self.layer_1 = nn.Linear(num_voxels, 200)
                 self.acti = nn.ReLU()
                 self.layer_2 = nn.Linear(200, num_voxels * num_classes)
diff --git a/tests/test_shift_intensityd.py b/tests/test_shift_intensityd.py
index 0396857781..66aad23b1e 100644
--- a/tests/test_shift_intensityd.py
+++ b/tests/test_shift_intensityd.py
@@ -24,7 +24,7 @@ def test_value(self):
             shifter = ShiftIntensityd(keys=[key], offset=1.0)
             result = shifter({key: p(self.imt)})
             expected = self.imt + 1.0
-            assert_allclose(result[key], expected)
+            assert_allclose(result[key], p(expected))
 
     def test_factor(self):
         key = "img"
diff --git a/tests/test_smartcache_patch_wsi_dataset.py b/tests/test_smartcache_patch_wsi_dataset.py
index c484e5fc69..a2605ec525 100644
--- a/tests/test_smartcache_patch_wsi_dataset.py
+++ b/tests/test_smartcache_patch_wsi_dataset.py
@@ -21,9 +21,10 @@
 from monai.apps.utils import download_url
 from monai.utils import optional_import
 
-_, has_cim = optional_import("cucim")
+_cucim, has_cim = optional_import("cucim")
+has_cim = has_cim and hasattr(_cucim, "CuImage")
 
-FILE_URL = "http://openslide.cs.cmu.edu/download/openslide-testdata/Generic-TIFF/CMU-1.tiff"
+FILE_URL = "https://drive.google.com/uc?id=1sGTKZlJBIz53pfqTxoTqiIQzIoEzHLAe"
 FILE_PATH = os.path.join(os.path.dirname(__file__), "testing_data", "temp_" + os.path.basename(FILE_URL))
 
 TEST_CASE_0 = [
diff --git a/tests/test_spacing.py b/tests/test_spacing.py
index 6be6730c5a..cd362bccea 100644
--- a/tests/test_spacing.py
+++ b/tests/test_spacing.py
@@ -12,155 +12,204 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 
 from monai.transforms import Spacing
 from monai.utils import ensure_tuple, fall_back_tuple
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = [
-    [
-        {"pixdim": (1.0, 1.5), "padding_mode": "zeros", "dtype": float},
-        np.arange(4).reshape((1, 2, 2)) + 1.0,  # data
-        {"affine": np.eye(4)},
-        np.array([[[1.0, 1.0], [3.0, 2.0]]]),
-    ],
-    [
-        {"pixdim": 1.0, "padding_mode": "zeros", "dtype": float},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.eye(4)},
-        np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
-    ],
-    [
-        {"pixdim": (1.0, 1.0, 1.0), "padding_mode": "zeros", "dtype": float},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.eye(4)},
-        np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
-    ],
-    [
-        {"pixdim": (1.0, 0.2, 1.5), "diagonal": False, "padding_mode": "zeros", "align_corners": True},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.array([[2, 1, 0, 4], [-1, -3, 0, 5], [0, 0, 2.0, 5], [0, 0, 0, 1]])},
-        np.array([[[[0.95527864, 0.95527864]], [[1.0, 1.0]], [[1.0, 1.0]]]]),
-    ],
-    [
-        {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
-        np.arange(24).reshape((2, 3, 4)),  # data
-        {"affine": np.diag([-3.0, 0.2, 1.5, 1])},
-        np.array([[[0, 0], [4, 0], [8, 0]], [[12, 0], [16, 0], [20, 0]]]),
-    ],
-    [
-        {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
-        np.arange(24).reshape((2, 3, 4)),  # data
-        {},
-        np.array([[[0, 1, 2, 3], [0, 0, 0, 0]], [[12, 13, 14, 15], [0, 0, 0, 0]]]),
-    ],
-    [
-        {"pixdim": (1.0, 1.0)},
-        np.arange(24).reshape((2, 3, 4)),  # data
-        {},
-        np.array(
-            [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]
-        ),
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0)},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, -5], [0, 0, 6, -6], [0, 0, 0, 1]])},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0), "diagonal": True},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
-        np.array(
-            [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
-        ),
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
-        np.array(
-            [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
-        ),
-    ],
-    [
-        {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
-        np.arange(24).reshape((1, 2, 3, 4)),  # data
-        {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
-        np.array(
-            [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
-        ),
-    ],
-    [
-        {"pixdim": (1.9, 4.0), "padding_mode": "zeros", "diagonal": True},
-        np.arange(24).reshape((1, 4, 6)),  # data
-        {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
-        np.array(
-            [
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 1.5), "padding_mode": "zeros", "dtype": float},
+            np.arange(4).reshape((1, 2, 2)) + 1.0,  # data
+            {"affine": np.eye(4)},
+            np.array([[[1.0, 1.0], [3.0, 2.0]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": 1.0, "padding_mode": "zeros", "dtype": float},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.eye(4)},
+            np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 1.0, 1.0), "padding_mode": "zeros", "dtype": float},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.eye(4)},
+            np.array([[[[1.0, 1.0]], [[1.0, 1.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 0.2, 1.5), "diagonal": False, "padding_mode": "zeros", "align_corners": True},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.array([[2, 1, 0, 4], [-1, -3, 0, 5], [0, 0, 2.0, 5], [0, 0, 0, 1]])},
+            np.array([[[[0.95527864, 0.95527864]], [[1.0, 1.0]], [[1.0, 1.0]]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
+            np.arange(24).reshape((2, 3, 4)),  # data
+            {"affine": np.diag([-3.0, 0.2, 1.5, 1])},
+            np.array([[[0, 0], [4, 0], [8, 0]], [[12, 0], [16, 0], [20, 0]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (3.0, 1.0), "padding_mode": "zeros"},
+            np.arange(24).reshape((2, 3, 4)),  # data
+            {},
+            np.array([[[0, 1, 2, 3], [0, 0, 0, 0]], [[12, 13, 14, 15], [0, 0, 0, 0]]]),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.0, 1.0)},
+            np.arange(24).reshape((2, 3, 4)),  # data
+            {},
+            np.array(
+                [[[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]], [[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0)},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, -5], [0, 0, 6, -6], [0, 0, 0, 1]])},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0), "diagonal": True},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, 4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
+            np.array(
+                [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]])},
+            np.array(
+                [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (4.0, 5.0, 6.0), "padding_mode": "border", "diagonal": True},
+            np.arange(24).reshape((1, 2, 3, 4)),  # data
+            {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
+            np.array(
+                [[[[12, 13, 14, 15], [16, 17, 18, 19], [20, 21, 22, 23]], [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]]]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (1.9, 4.0), "padding_mode": "zeros", "diagonal": True},
+            np.arange(24).reshape((1, 4, 6)),  # data
+            {"affine": np.array([[-4, 0, 0, -4], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "nearest"},
+            np.array(
                 [
-                    [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
-                    [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
-                    [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
-                    [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
-                    [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
-                    [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
-                    [0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0],
+                    [
+                        [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
+                        [18.0, 19.0, 20.0, 20.0, 21.0, 22.0, 23.0],
+                        [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
+                        [12.0, 13.0, 14.0, 14.0, 15.0, 16.0, 17.0],
+                        [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
+                        [6.0, 7.0, 8.0, 8.0, 9.0, 10.0, 11.0],
+                        [0.0, 1.0, 2.0, 2.0, 3.0, 4.0, 5.0],
+                    ]
                 ]
-            ]
-        ),
-    ],
-    [
-        {"pixdim": (5.0, 3.0), "padding_mode": "border", "diagonal": True, "dtype": np.float32},
-        np.arange(24).reshape((1, 4, 6)),  # data
-        {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
-        np.array(
-            [
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (5.0, 3.0), "padding_mode": "border", "diagonal": True, "dtype": np.float32},
+            np.arange(24).reshape((1, 4, 6)),  # data
+            {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
+            np.array(
                 [
-                    [18.0, 18.6, 19.2, 19.8, 20.400002, 21.0, 21.6, 22.2, 22.8],
-                    [10.5, 11.1, 11.700001, 12.299999, 12.900001, 13.5, 14.1, 14.700001, 15.3],
-                    [3.0, 3.6000001, 4.2000003, 4.8, 5.4000006, 6.0, 6.6000004, 7.200001, 7.8],
+                    [
+                        [18.0, 18.6, 19.2, 19.8, 20.400002, 21.0, 21.6, 22.2, 22.8],
+                        [10.5, 11.1, 11.700001, 12.299999, 12.900001, 13.5, 14.1, 14.700001, 15.3],
+                        [3.0, 3.6000001, 4.2000003, 4.8, 5.4000006, 6.0, 6.6000004, 7.200001, 7.8],
+                    ]
                 ]
-            ]
-        ),
-    ],
-    [
-        {"pixdim": (5.0, 3.0), "padding_mode": "zeros", "diagonal": True, "dtype": np.float32},
-        np.arange(24).reshape((1, 4, 6)),  # data
-        {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
-        np.array(
-            [
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": (5.0, 3.0), "padding_mode": "zeros", "diagonal": True, "dtype": np.float32},
+            np.arange(24).reshape((1, 4, 6)),  # data
+            {"affine": np.array([[-4, 0, 0, 0], [0, 5, 0, 0], [0, 0, 6, 0], [0, 0, 0, 1]]), "mode": "bilinear"},
+            np.array(
                 [
-                    [18.0000, 18.6000, 19.2000, 19.8000, 20.4000, 21.0000, 21.6000, 22.2000, 22.8000],
-                    [10.5000, 11.1000, 11.7000, 12.3000, 12.9000, 13.5000, 14.1000, 14.7000, 15.3000],
-                    [3.0000, 3.6000, 4.2000, 4.8000, 5.4000, 6.0000, 6.6000, 7.2000, 7.8000],
+                    [
+                        [18.0000, 18.6000, 19.2000, 19.8000, 20.4000, 21.0000, 21.6000, 22.2000, 22.8000],
+                        [10.5000, 11.1000, 11.7000, 12.3000, 12.9000, 13.5000, 14.1000, 14.7000, 15.3000],
+                        [3.0000, 3.6000, 4.2000, 4.8000, 5.4000, 6.0000, 6.6000, 7.2000, 7.8000],
+                    ]
                 ]
-            ]
-        ),
-    ],
-    [
-        {"pixdim": [-1, -1, 0.5], "padding_mode": "zeros", "dtype": float},
-        np.ones((1, 2, 1, 2)),  # data
-        {"affine": np.eye(4)},
-        np.array([[[[1.0, 1.0, 1.0]], [[1.0, 1.0, 1.0]]]]),
-    ],
-]
+            ),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"pixdim": [-1, -1, 0.5], "padding_mode": "zeros", "dtype": float},
+            np.ones((1, 2, 1, 2)),  # data
+            {"affine": np.eye(4)},
+            np.array([[[[1.0, 1.0, 1.0]], [[1.0, 1.0, 1.0]]]]),
+        ]
+    )
 
 
 class TestSpacingCase(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
-    def test_spacing(self, init_param, img, data_param, expected_output):
-        res = Spacing(**init_param)(img, **data_param)
-        if not isinstance(res, tuple):
-            np.testing.assert_allclose(res, expected_output, atol=1e-6)
-            return
-        np.testing.assert_allclose(res[0], expected_output, atol=1e-6)
-        sr = len(res[0].shape) - 1
+    @parameterized.expand(TESTS)
+    def test_spacing(self, in_type, init_param, img, data_param, expected_output):
+        _img = in_type(img)
+        output_data, _, new_affine = Spacing(**init_param)(_img, **data_param)
+        if isinstance(_img, torch.Tensor):
+            self.assertEqual(_img.device, output_data.device)
+            output_data = output_data.cpu()
+
+        np.testing.assert_allclose(output_data, expected_output, atol=1e-3, rtol=1e-3)
+        sr = len(output_data.shape) - 1
         if isinstance(init_param["pixdim"], float):
             init_param["pixdim"] = [init_param["pixdim"]] * sr
         init_pixdim = ensure_tuple(init_param["pixdim"])
         init_pixdim = init_param["pixdim"][:sr]
-        norm = np.sqrt(np.sum(np.square(res[2]), axis=0))[:sr]
+        norm = np.sqrt(np.sum(np.square(new_affine), axis=0))[:sr]
         np.testing.assert_allclose(fall_back_tuple(init_pixdim, norm), norm)
 
 
diff --git a/tests/test_spacingd.py b/tests/test_spacingd.py
index 61a4a4c38b..fd1ee7fd54 100644
--- a/tests/test_spacingd.py
+++ b/tests/test_spacingd.py
@@ -10,82 +10,95 @@
 # limitations under the License.
 
 import unittest
+from typing import List, Tuple
 
 import numpy as np
+import torch
+from parameterized import parameterized
 
 from monai.transforms import Spacingd
+from tests.utils import TEST_NDARRAYS
 
-
-class TestSpacingDCase(unittest.TestCase):
-    def test_spacingd_3d(self):
-        data = {"image": np.ones((2, 10, 15, 20)), "image_meta_dict": {"affine": np.eye(4)}}
-        spacing = Spacingd(keys="image", pixdim=(1, 2, 1.4))
-        res = spacing(data)
-        self.assertEqual(("image", "image_meta_dict", "image_transforms"), tuple(sorted(res)))
-        np.testing.assert_allclose(res["image"].shape, (2, 10, 8, 15))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag([1, 2, 1.4, 1.0]))
-
-    def test_spacingd_2d(self):
-        data = {"image": np.ones((2, 10, 20)), "image_meta_dict": {"affine": np.eye(3)}}
-        spacing = Spacingd(keys="image", pixdim=(1, 2))
-        res = spacing(data)
-        self.assertEqual(("image", "image_meta_dict", "image_transforms"), tuple(sorted(res)))
-        np.testing.assert_allclose(res["image"].shape, (2, 10, 10))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 2, 1)))
-
-    def test_spacingd_2d_no_metadata(self):
-        data = {"image": np.ones((2, 10, 20))}
-        spacing = Spacingd(keys="image", pixdim=(1, 2))
-        res = spacing(data)
-        self.assertEqual(("image", "image_meta_dict", "image_transforms"), tuple(sorted(res)))
-        np.testing.assert_allclose(res["image"].shape, (2, 10, 10))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 2, 1)))
-
-    def test_interp_all(self):
-        data = {
-            "image": np.arange(20).reshape((2, 1, 10)),
-            "seg": np.ones((2, 1, 10)),
-            "image_meta_dict": {"affine": np.eye(4)},
-            "seg_meta_dict": {"affine": np.eye(4)},
-        }
-        spacing = Spacingd(
-            keys=("image", "seg"),
-            mode="nearest",
-            pixdim=(
-                1,
-                0.2,
-            ),
+TESTS: List[Tuple] = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        (
+            "spacing 3d",
+            {"image": p(np.ones((2, 10, 15, 20))), "image_meta_dict": {"affine": p(np.eye(4))}},
+            dict(keys="image", pixdim=(1, 2, 1.4)),
+            ("image", "image_meta_dict", "image_transforms"),
+            (2, 10, 8, 15),
+            np.diag([1, 2, 1.4, 1.0]),
         )
-        res = spacing(data)
-        self.assertEqual(
-            ("image", "image_meta_dict", "image_transforms", "seg", "seg_meta_dict", "seg_transforms"),
-            tuple(sorted(res)),
+    )
+    TESTS.append(
+        (
+            "spacing 2d",
+            {"image": np.ones((2, 10, 20)), "image_meta_dict": {"affine": np.eye(3)}},
+            dict(keys="image", pixdim=(1, 2)),
+            ("image", "image_meta_dict", "image_transforms"),
+            (2, 10, 10),
+            np.diag((1, 2, 1)),
         )
-        np.testing.assert_allclose(res["image"].shape, (2, 1, 46))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 0.2, 1, 1)))
-
-    def test_interp_sep(self):
-        data = {
-            "image": np.ones((2, 1, 10)),
-            "seg": np.ones((2, 1, 10)),
-            "image_meta_dict": {"affine": np.eye(4)},
-            "seg_meta_dict": {"affine": np.eye(4)},
-        }
-        spacing = Spacingd(
-            keys=("image", "seg"),
-            mode=("bilinear", "nearest"),
-            pixdim=(
-                1,
-                0.2,
+    )
+    TESTS.append(
+        (
+            "spacing 2d no metadata",
+            {"image": np.ones((2, 10, 20))},
+            dict(keys="image", pixdim=(1, 2)),
+            ("image", "image_meta_dict", "image_transforms"),
+            (2, 10, 10),
+            np.diag((1, 2, 1)),
+        )
+    )
+    TESTS.append(
+        (
+            "interp all",
+            {
+                "image": np.arange(20).reshape((2, 1, 10)),
+                "seg": np.ones((2, 1, 10)),
+                "image_meta_dict": {"affine": np.eye(4)},
+                "seg_meta_dict": {"affine": np.eye(4)},
+            },
+            dict(
+                keys=("image", "seg"),
+                mode="nearest",
+                pixdim=(
+                    1,
+                    0.2,
+                ),
             ),
+            ("image", "image_meta_dict", "image_transforms", "seg", "seg_meta_dict", "seg_transforms"),
+            (2, 1, 46),
+            np.diag((1, 0.2, 1, 1)),
         )
-        res = spacing(data)
-        self.assertEqual(
+    )
+    TESTS.append(
+        (
+            "interp sep",
+            {
+                "image": np.ones((2, 1, 10)),
+                "seg": np.ones((2, 1, 10)),
+                "image_meta_dict": {"affine": np.eye(4)},
+                "seg_meta_dict": {"affine": np.eye(4)},
+            },
+            dict(keys=("image", "seg"), mode=("bilinear", "nearest"), pixdim=(1, 0.2)),
             ("image", "image_meta_dict", "image_transforms", "seg", "seg_meta_dict", "seg_transforms"),
-            tuple(sorted(res)),
+            (2, 1, 46),
+            np.diag((1, 0.2, 1, 1)),
         )
-        np.testing.assert_allclose(res["image"].shape, (2, 1, 46))
-        np.testing.assert_allclose(res["image_meta_dict"]["affine"], np.diag((1, 0.2, 1, 1)))
+    )
+
+
+class TestSpacingDCase(unittest.TestCase):
+    @parameterized.expand(TESTS)
+    def test_spacingd(self, _, data, kw_args, expected_keys, expected_shape, expected_affine):
+        res = Spacingd(**kw_args)(data)
+        if isinstance(data["image"], torch.Tensor):
+            self.assertEqual(data["image"].device, res["image"].device)
+        self.assertEqual(expected_keys, tuple(sorted(res)))
+        np.testing.assert_allclose(res["image"].shape, expected_shape)
+        np.testing.assert_allclose(res["image_meta_dict"]["affine"], expected_affine)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_spatial_crop.py b/tests/test_spatial_crop.py
index c76915f0a3..c18dd86e46 100644
--- a/tests/test_spatial_crop.py
+++ b/tests/test_spatial_crop.py
@@ -16,8 +16,9 @@
 from parameterized import parameterized
 
 from monai.transforms import SpatialCrop
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASES = [
+TESTS = [
     [
         {"roi_center": [1, 1, 1], "roi_size": [2, 2, 2]},
         (3, 3, 3, 3),
@@ -53,17 +54,24 @@
 
 
 class TestSpatialCrop(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_shape(self, input_param, input_shape, expected_shape):
         input_data = np.random.randint(0, 2, size=input_shape)
-        result = SpatialCrop(**input_param)(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
-
-    @parameterized.expand(TEST_CASES)
-    def test_tensor_shape(self, input_param, input_shape, expected_shape):
-        input_data = torch.randint(0, 2, size=input_shape, device="cuda" if torch.cuda.is_available() else "cpu")
-        result = SpatialCrop(**input_param)(input_data)
-        self.assertTupleEqual(result.shape, expected_shape)
+        results = []
+        for p in TEST_NDARRAYS:
+            for q in TEST_NDARRAYS + (None,):
+                input_param_mod = {
+                    k: q(v) if k != "roi_slices" and q is not None else v for k, v in input_param.items()
+                }
+                im = p(input_data)
+                result = SpatialCrop(**input_param_mod)(im)
+                self.assertEqual(type(im), type(result))
+                if isinstance(result, torch.Tensor):
+                    self.assertEqual(result.device, im.device)
+                self.assertTupleEqual(result.shape, expected_shape)
+                results.append(result)
+                if len(results) > 1:
+                    assert_allclose(results[0], results[-1], type_test=False)
 
     @parameterized.expand(TEST_ERRORS)
     def test_error(self, input_param):
diff --git a/tests/test_spatial_cropd.py b/tests/test_spatial_cropd.py
index 797c25d34b..17743124e0 100644
--- a/tests/test_spatial_cropd.py
+++ b/tests/test_spatial_cropd.py
@@ -15,38 +15,49 @@
 from parameterized import parameterized
 
 from monai.transforms import SpatialCropd
+from tests.utils import TEST_NDARRAYS
 
-TEST_CASES = [
-    [
-        {"keys": ["img"], "roi_center": [1, 1, 1], "roi_size": [2, 2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 2),
-    ],
-    [
-        {"keys": ["img"], "roi_start": [0, 0, 0], "roi_end": [2, 2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 2),
-    ],
-    [
-        {"keys": ["img"], "roi_start": [0, 0], "roi_end": [2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 3),
-    ],
-    [
-        {"keys": ["img"], "roi_start": [0, 0, 0, 0, 0], "roi_end": [2, 2, 2, 2, 2]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 2, 2, 2),
-    ],
-    [
-        {"keys": ["img"], "roi_slices": [slice(s, e) for s, e in zip([-1, -2, 0], [None, None, 2])]},
-        {"img": np.random.randint(0, 2, size=[3, 3, 3, 3])},
-        (3, 1, 2, 2),
-    ],
-]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_center": [1, 1, 1], "roi_size": [2, 2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 2),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_start": [0, 0, 0], "roi_end": [2, 2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 2),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_start": [0, 0], "roi_end": [2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 3),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_start": [0, 0, 0, 0, 0], "roi_end": [2, 2, 2, 2, 2]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 2, 2, 2),
+        ]
+    )
+    TESTS.append(
+        [
+            {"keys": ["img"], "roi_slices": [slice(s, e) for s, e in zip([-1, -2, 0], [None, None, 2])]},
+            {"img": p(np.random.randint(0, 2, size=[3, 3, 3, 3]))},
+            (3, 1, 2, 2),
+        ]
+    )
 
 
 class TestSpatialCropd(unittest.TestCase):
-    @parameterized.expand(TEST_CASES)
+    @parameterized.expand(TESTS)
     def test_shape(self, input_param, input_data, expected_shape):
         result = SpatialCropd(**input_param)(input_data)
         self.assertTupleEqual(result["img"].shape, expected_shape)
diff --git a/tests/test_spatial_pad.py b/tests/test_spatial_pad.py
index 86d010bbad..3d237c6681 100644
--- a/tests/test_spatial_pad.py
+++ b/tests/test_spatial_pad.py
@@ -17,34 +17,41 @@
 from parameterized import parameterized
 
 from monai.transforms import SpatialPad
-from monai.utils.enums import NumpyPadMode
+from monai.utils.enums import NumpyPadMode, PytorchPadMode
 from monai.utils.misc import set_determinism
 from tests.utils import TEST_NDARRAYS
 
 TESTS = []
 
-# Numpy modes
-MODES: List = [
+MODES = []
+
+# Test modes
+NP_MODES: List = [
     "constant",
     "edge",
-    "linear_ramp",
-    "maximum",
-    "mean",
-    "median",
-    "minimum",
-    "reflect",
-    "symmetric",
+    # `reflect` mode is not supported in some PyTorch versions, skip the test
+    # "reflect",
     "wrap",
-    "empty",
 ]
-MODES += [NumpyPadMode(i) for i in MODES]
+MODES += NP_MODES
+MODES += [NumpyPadMode(i) for i in NP_MODES]
+
+PT_MODES: list = [
+    "constant",
+    "replicate",
+    "circular",
+    # `reflect` mode is not supported in some PyTorch versions, skip the test
+    # "reflect",
+]
+MODES += PT_MODES
+MODES += [PytorchPadMode(i) for i in PT_MODES]
 
 for mode in MODES:
     TESTS.append(
         [
-            {"spatial_size": [50, 50], "method": "end", "mode": mode},
-            (1, 2, 2),
-            (1, 50, 50),
+            {"spatial_size": [3, 4], "method": "end", "mode": mode},
+            (1, 2, 3),
+            (1, 3, 4),
         ]
     )
 
@@ -86,14 +93,19 @@ def test_pad_shape(self, input_param, input_shape, expected_shape):
                     torch.testing.assert_allclose(results[0], results[-1], atol=0, rtol=1e-5)
 
     def test_pad_kwargs(self):
-        padder = SpatialPad(
-            spatial_size=[15, 8], method="end", mode="constant", constant_values=((0, 0), (1, 1), (2, 2))
-        )
         for p in TEST_NDARRAYS:
-            result = padder(p(np.zeros((3, 8, 4))))
-            if isinstance(result, torch.Tensor):
-                result = result.cpu().numpy()
-            torch.testing.assert_allclose(result[:, 8:, :4], np.ones((3, 7, 4)), rtol=1e-7, atol=0)
+            input_data = p(np.zeros((3, 8, 4)))
+            if isinstance(input_data, torch.Tensor):
+                result = (
+                    SpatialPad(spatial_size=[15, 8], method="end", mode="constant", value=2)(img=input_data)
+                    .cpu()
+                    .numpy()
+                )
+            else:
+                result = SpatialPad(
+                    spatial_size=[15, 8], method="end", mode="constant", constant_values=((0, 0), (1, 1), (2, 2))
+                )(img=input_data)
+                torch.testing.assert_allclose(result[:, 8:, :4], np.ones((3, 7, 4)), rtol=1e-7, atol=0)
             torch.testing.assert_allclose(result[:, :, 4:], np.ones((3, 15, 4)) + 1, rtol=1e-7, atol=0)
 
 
diff --git a/tests/test_threadcontainer.py b/tests/test_threadcontainer.py
index 543dab4d0c..5613b1babd 100644
--- a/tests/test_threadcontainer.py
+++ b/tests/test_threadcontainer.py
@@ -79,7 +79,7 @@ def test_plot(self):
         # a third non-image key is added to test that this is correctly ignored when plotting
         data = {CommonKeys.IMAGE: img, CommonKeys.LABEL: img, "Not Image Data": ["This isn't an image"]}
 
-        loader = DataLoader([data] * 10)
+        loader = DataLoader([data] * 20, batch_size=2)
 
         trainer = SupervisedTrainer(
             device=torch.device("cpu"),
diff --git a/tests/test_threshold_intensity.py b/tests/test_threshold_intensity.py
index a6d3895709..075a650ec0 100644
--- a/tests/test_threshold_intensity.py
+++ b/tests/test_threshold_intensity.py
@@ -15,20 +15,21 @@
 from parameterized import parameterized
 
 from monai.transforms import ThresholdIntensity
+from tests.utils import TEST_NDARRAYS, assert_allclose
 
-TEST_CASE_1 = [{"threshold": 5, "above": True, "cval": 0}, (0, 0, 0, 0, 0, 0, 6, 7, 8, 9)]
-
-TEST_CASE_2 = [{"threshold": 5, "above": False, "cval": 0}, (0, 1, 2, 3, 4, 0, 0, 0, 0, 0)]
-
-TEST_CASE_3 = [{"threshold": 5, "above": True, "cval": 5}, (5, 5, 5, 5, 5, 5, 6, 7, 8, 9)]
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append([p, {"threshold": 5, "above": True, "cval": 0}, (0, 0, 0, 0, 0, 0, 6, 7, 8, 9)])
+    TESTS.append([p, {"threshold": 5, "above": False, "cval": 0}, (0, 1, 2, 3, 4, 0, 0, 0, 0, 0)])
+    TESTS.append([p, {"threshold": 5, "above": True, "cval": 5}, (5, 5, 5, 5, 5, 5, 6, 7, 8, 9)])
 
 
 class TestThresholdIntensity(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
-    def test_value(self, input_param, expected_value):
-        test_data = np.arange(10)
+    @parameterized.expand(TESTS)
+    def test_value(self, in_type, input_param, expected_value):
+        test_data = in_type(np.arange(10))
         result = ThresholdIntensity(**input_param)(test_data)
-        np.testing.assert_allclose(result, expected_value)
+        assert_allclose(result, in_type(expected_value))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_threshold_intensityd.py b/tests/test_threshold_intensityd.py
index efcfcfe604..a2a9fdcf2b 100644
--- a/tests/test_threshold_intensityd.py
+++ b/tests/test_threshold_intensityd.py
@@ -15,31 +15,41 @@
 from parameterized import parameterized
 
 from monai.transforms import ThresholdIntensityd
-
-TEST_CASE_1 = [
-    {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 0},
-    (0, 0, 0, 0, 0, 0, 6, 7, 8, 9),
-]
-
-TEST_CASE_2 = [
-    {"keys": ["image", "label", "extra"], "threshold": 5, "above": False, "cval": 0},
-    (0, 1, 2, 3, 4, 0, 0, 0, 0, 0),
-]
-
-TEST_CASE_3 = [
-    {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 5},
-    (5, 5, 5, 5, 5, 5, 6, 7, 8, 9),
-]
+from tests.utils import TEST_NDARRAYS, assert_allclose
+
+TESTS = []
+for p in TEST_NDARRAYS:
+    TESTS.append(
+        [
+            p,
+            {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 0},
+            (0, 0, 0, 0, 0, 0, 6, 7, 8, 9),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"keys": ["image", "label", "extra"], "threshold": 5, "above": False, "cval": 0},
+            (0, 1, 2, 3, 4, 0, 0, 0, 0, 0),
+        ]
+    )
+    TESTS.append(
+        [
+            p,
+            {"keys": ["image", "label", "extra"], "threshold": 5, "above": True, "cval": 5},
+            (5, 5, 5, 5, 5, 5, 6, 7, 8, 9),
+        ]
+    )
 
 
 class TestThresholdIntensityd(unittest.TestCase):
-    @parameterized.expand([TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
-    def test_value(self, input_param, expected_value):
-        test_data = {"image": np.arange(10), "label": np.arange(10), "extra": np.arange(10)}
+    @parameterized.expand(TESTS)
+    def test_value(self, in_type, input_param, expected_value):
+        test_data = {"image": in_type(np.arange(10)), "label": in_type(np.arange(10)), "extra": in_type(np.arange(10))}
         result = ThresholdIntensityd(**input_param)(test_data)
-        np.testing.assert_allclose(result["image"], expected_value)
-        np.testing.assert_allclose(result["label"], expected_value)
-        np.testing.assert_allclose(result["extra"], expected_value)
+        assert_allclose(result["image"], in_type(expected_value))
+        assert_allclose(result["label"], in_type(expected_value))
+        assert_allclose(result["extra"], in_type(expected_value))
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_cupy.py b/tests/test_to_cupy.py
index 8b00e12539..0fd9607339 100644
--- a/tests/test_to_cupy.py
+++ b/tests/test_to_cupy.py
@@ -22,49 +22,81 @@
 cp, has_cp = optional_import("cupy")
 
 
+@skipUnless(has_cp, "CuPy is required.")
 class TestToCupy(unittest.TestCase):
-    @skipUnless(has_cp, "CuPy is required.")
     def test_cupy_input(self):
-        test_data = cp.array([[1, 2], [3, 4]])
+        test_data = cp.array([[1, 2], [3, 4]], dtype=cp.float32)
         test_data = cp.rot90(test_data)
         self.assertFalse(test_data.flags["C_CONTIGUOUS"])
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertTrue(result.flags["C_CONTIGUOUS"])
+        cp.testing.assert_allclose(result, test_data)
+
+    def test_cupy_input_dtype(self):
+        test_data = cp.array([[1, 2], [3, 4]], dtype=cp.float32)
+        test_data = cp.rot90(test_data)
+        self.assertFalse(test_data.flags["C_CONTIGUOUS"])
+        result = ToCupy(cp.uint8)(test_data)
+        self.assertTrue(result.dtype == cp.uint8)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
         cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     def test_numpy_input(self):
-        test_data = np.array([[1, 2], [3, 4]])
+        test_data = np.array([[1, 2], [3, 4]], dtype=np.float32)
         test_data = np.rot90(test_data)
         self.assertFalse(test_data.flags["C_CONTIGUOUS"])
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertTrue(result.flags["C_CONTIGUOUS"])
+        cp.testing.assert_allclose(result, test_data)
+
+    def test_numpy_input_dtype(self):
+        test_data = np.array([[1, 2], [3, 4]], dtype=np.float32)
+        test_data = np.rot90(test_data)
+        self.assertFalse(test_data.flags["C_CONTIGUOUS"])
+        result = ToCupy(np.uint8)(test_data)
+        self.assertTrue(result.dtype == cp.uint8)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
         cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     def test_tensor_input(self):
-        test_data = torch.tensor([[1, 2], [3, 4]])
+        test_data = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
         test_data = test_data.rot90()
         self.assertFalse(test_data.is_contiguous())
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        cp.testing.assert_allclose(result, test_data.numpy())
+        cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     @skip_if_no_cuda
     def test_tensor_cuda_input(self):
-        test_data = torch.tensor([[1, 2], [3, 4]]).cuda()
+        test_data = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32).cuda()
         test_data = test_data.rot90()
         self.assertFalse(test_data.is_contiguous())
         result = ToCupy()(test_data)
+        self.assertTrue(result.dtype == cp.float32)
         self.assertTrue(isinstance(result, cp.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        cp.testing.assert_allclose(result, test_data.cpu().numpy())
+        cp.testing.assert_allclose(result, test_data)
+
+    @skip_if_no_cuda
+    def test_tensor_cuda_input_dtype(self):
+        test_data = torch.tensor([[1, 2], [3, 4]], dtype=torch.uint8).cuda()
+        test_data = test_data.rot90()
+        self.assertFalse(test_data.is_contiguous())
+
+        result = ToCupy(dtype="float32")(test_data)
+        self.assertTrue(result.dtype == cp.float32)
+        self.assertTrue(isinstance(result, cp.ndarray))
+        self.assertTrue(result.flags["C_CONTIGUOUS"])
+        cp.testing.assert_allclose(result, test_data)
 
-    @skipUnless(has_cp, "CuPy is required.")
     def test_list_tuple(self):
         test_data = [[1, 2], [3, 4]]
         result = ToCupy()(test_data)
diff --git a/tests/test_to_numpy.py b/tests/test_to_numpy.py
index b48727c01d..c7631540b8 100644
--- a/tests/test_to_numpy.py
+++ b/tests/test_to_numpy.py
@@ -31,16 +31,17 @@ def test_cupy_input(self):
         result = ToNumpy()(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data.get())
+        assert_allclose(result, test_data.get(), type_test=False)
 
     def test_numpy_input(self):
         test_data = np.array([[1, 2], [3, 4]])
         test_data = np.rot90(test_data)
         self.assertFalse(test_data.flags["C_CONTIGUOUS"])
-        result = ToNumpy()(test_data)
+        result = ToNumpy(dtype="float32")(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
+        self.assertTrue(result.dtype == np.float32)
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     def test_tensor_input(self):
         test_data = torch.tensor([[1, 2], [3, 4]])
@@ -49,7 +50,7 @@ def test_tensor_input(self):
         result = ToNumpy()(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     @skip_if_no_cuda
     def test_tensor_cuda_input(self):
@@ -59,21 +60,21 @@ def test_tensor_cuda_input(self):
         result = ToNumpy()(test_data)
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     def test_list_tuple(self):
         test_data = [[1, 2], [3, 4]]
         result = ToNumpy()(test_data)
-        assert_allclose(result, np.asarray(test_data))
+        assert_allclose(result, np.asarray(test_data), type_test=False)
         test_data = ((1, 2), (3, 4))
         result = ToNumpy()(test_data)
-        assert_allclose(result, np.asarray(test_data))
+        assert_allclose(result, np.asarray(test_data), type_test=False)
 
     def test_single_value(self):
         for test_data in [5, np.array(5), torch.tensor(5)]:
             result = ToNumpy()(test_data)
             self.assertTrue(isinstance(result, np.ndarray))
-            assert_allclose(result, np.asarray(test_data))
+            assert_allclose(result, np.asarray(test_data), type_test=False)
             self.assertEqual(result.ndim, 0)
 
 
diff --git a/tests/test_to_numpyd.py b/tests/test_to_numpyd.py
index 5acaef39c7..0b0b032ef2 100644
--- a/tests/test_to_numpyd.py
+++ b/tests/test_to_numpyd.py
@@ -31,7 +31,7 @@ def test_cupy_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data.get())
+        assert_allclose(result, test_data.get(), type_test=False)
 
     def test_numpy_input(self):
         test_data = np.array([[1, 2], [3, 4]])
@@ -40,7 +40,7 @@ def test_numpy_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     def test_tensor_input(self):
         test_data = torch.tensor([[1, 2], [3, 4]])
@@ -49,7 +49,7 @@ def test_tensor_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
     @skip_if_no_cuda
     def test_tensor_cuda_input(self):
@@ -59,7 +59,7 @@ def test_tensor_cuda_input(self):
         result = ToNumpyd(keys="img")({"img": test_data})["img"]
         self.assertTrue(isinstance(result, np.ndarray))
         self.assertTrue(result.flags["C_CONTIGUOUS"])
-        assert_allclose(result, test_data)
+        assert_allclose(result, test_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_pil.py b/tests/test_to_pil.py
index 5690645dd8..b4581053c0 100644
--- a/tests/test_to_pil.py
+++ b/tests/test_to_pil.py
@@ -43,7 +43,7 @@ class TestToPIL(unittest.TestCase):
     def test_value(self, test_data):
         result = ToPIL()(test_data)
         self.assertTrue(isinstance(result, PILImageImage))
-        assert_allclose(np.array(result), test_data)
+        assert_allclose(np.array(result), test_data, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_pild.py b/tests/test_to_pild.py
index 3a15b1e507..3b83fa5258 100644
--- a/tests/test_to_pild.py
+++ b/tests/test_to_pild.py
@@ -30,9 +30,7 @@
     PILImageImage, _ = optional_import("PIL.Image", name="Image")
 
 im = [[1.0, 2.0], [3.0, 4.0]]
-TESTS = []
-for p in TEST_NDARRAYS:
-    TESTS.append([{"keys": "image"}, {"image": p(im)}])
+TESTS = [[{"keys": "image"}, {"image": p(im)}] for p in TEST_NDARRAYS]
 if has_pil:
     TESTS.append([{"keys": "image"}, {"image": pil_image_fromarray(np.array(im))}])
 
@@ -43,7 +41,7 @@ class TestToPIL(unittest.TestCase):
     def test_values(self, input_param, test_data):
         result = ToPILd(**input_param)(test_data)[input_param["keys"]]
         self.assertTrue(isinstance(result, PILImageImage))
-        assert_allclose(np.array(result), test_data[input_param["keys"]])
+        assert_allclose(np.array(result), test_data[input_param["keys"]], type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_to_tensor.py b/tests/test_to_tensor.py
index 6ac06983f6..b065595e89 100644
--- a/tests/test_to_tensor.py
+++ b/tests/test_to_tensor.py
@@ -11,10 +11,13 @@
 
 import unittest
 
+import torch
 from parameterized import parameterized
 
 from monai.transforms import ToTensor
-from tests.utils import TEST_NDARRAYS, assert_allclose
+from tests.utils import TEST_NDARRAYS, assert_allclose, optional_import
+
+cp, has_cp = optional_import("cupy")
 
 im = [[1, 2], [3, 4]]
 
@@ -32,16 +35,26 @@
 class TestToTensor(unittest.TestCase):
     @parameterized.expand(TESTS)
     def test_array_input(self, test_data, expected_shape):
-        result = ToTensor()(test_data)
-        assert_allclose(result, test_data)
+        result = ToTensor(dtype=torch.float32, device="cpu")(test_data)
+        self.assertTrue(isinstance(result, torch.Tensor))
+        assert_allclose(result, test_data, type_test=False)
         self.assertTupleEqual(result.shape, expected_shape)
 
     @parameterized.expand(TESTS_SINGLE)
     def test_single_input(self, test_data):
         result = ToTensor()(test_data)
-        assert_allclose(result, test_data)
+        self.assertTrue(isinstance(result, torch.Tensor))
+        assert_allclose(result, test_data, type_test=False)
         self.assertEqual(result.ndim, 0)
 
+    @unittest.skipUnless(has_cp, "CuPy is required.")
+    def test_cupy(self):
+        test_data = [[1, 2], [3, 4]]
+        cupy_array = cp.ascontiguousarray(cp.asarray(test_data))
+        result = ToTensor()(cupy_array)
+        self.assertTrue(isinstance(result, torch.Tensor))
+        assert_allclose(result, test_data, type_test=False)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_transchex.py b/tests/test_transchex.py
new file mode 100644
index 0000000000..716d3cc52e
--- /dev/null
+++ b/tests/test_transchex.py
@@ -0,0 +1,80 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import torch
+from parameterized import parameterized
+
+from monai.networks import eval_mode
+from monai.networks.nets.transchex import Transchex
+
+TEST_CASE_TRANSCHEX = []
+for drop_out in [0.4]:
+    for in_channels in [3]:
+        for img_size in [224]:
+            for patch_size in [16, 32]:
+                for num_language_layers in [2]:
+                    for num_vision_layers in [4]:
+                        for num_mixed_layers in [3]:
+                            for num_classes in [8]:
+                                test_case = [
+                                    {
+                                        "in_channels": in_channels,
+                                        "img_size": (img_size,) * 2,
+                                        "patch_size": (patch_size,) * 2,
+                                        "num_vision_layers": num_vision_layers,
+                                        "num_mixed_layers": num_mixed_layers,
+                                        "num_language_layers": num_language_layers,
+                                        "num_classes": num_classes,
+                                        "drop_out": drop_out,
+                                    },
+                                    (2, num_classes),  # type: ignore
+                                ]
+                                TEST_CASE_TRANSCHEX.append(test_case)
+
+
+class TestPatchEmbeddingBlock(unittest.TestCase):
+    @parameterized.expand(TEST_CASE_TRANSCHEX)
+    def test_shape(self, input_param, expected_shape):
+        net = Transchex(**input_param)
+        with eval_mode(net):
+            result = net(torch.randint(2, (2, 512)), torch.randint(2, (2, 512)), torch.randn((2, 3, 224, 224)))
+            self.assertEqual(result.shape, expected_shape)
+
+    def test_ill_arg(self):
+        with self.assertRaises(ValueError):
+            Transchex(
+                in_channels=3,
+                img_size=(128, 128),
+                patch_size=(16, 16),
+                num_language_layers=2,
+                num_mixed_layers=4,
+                num_vision_layers=2,
+                num_classes=2,
+                drop_out=5.0,
+            )
+
+        with self.assertRaises(ValueError):
+            Transchex(
+                in_channels=1,
+                img_size=(97, 97),
+                patch_size=(16, 16),
+                num_language_layers=6,
+                num_mixed_layers=6,
+                num_vision_layers=8,
+                num_classes=8,
+                drop_out=0.4,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_transpose.py b/tests/test_transpose.py
index 10882c9dd8..16cca49e1c 100644
--- a/tests/test_transpose.py
+++ b/tests/test_transpose.py
@@ -42,7 +42,7 @@ def test_transpose(self, im, indices):
         if isinstance(im, torch.Tensor):
             im = im.cpu().numpy()
         out2 = np.transpose(im, indices)
-        assert_allclose(out1, out2)
+        assert_allclose(out1, out2, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_transposed.py b/tests/test_transposed.py
index 88ecd0c872..2f9558b74e 100644
--- a/tests/test_transposed.py
+++ b/tests/test_transposed.py
@@ -57,13 +57,13 @@ def test_transpose(self, im, indices):
         if isinstance(im, torch.Tensor):
             im = im.cpu().numpy()
         out_gt = np.transpose(im, indices)
-        assert_allclose(out_im1, out_gt)
-        assert_allclose(out_im2, out_gt)
+        assert_allclose(out_im1, out_gt, type_test=False)
+        assert_allclose(out_im2, out_gt, type_test=False)
 
         # test inverse
         fwd_inv_data = tr.inverse(out_data)
         for i, j in zip(data.values(), fwd_inv_data.values()):
-            assert_allclose(i, j)
+            assert_allclose(i, j, type_test=False)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_unet.py b/tests/test_unet.py
index 4091c4e9d7..94077dad14 100644
--- a/tests/test_unet.py
+++ b/tests/test_unet.py
@@ -170,13 +170,15 @@ def test_shape(self, input_param, input_shape, expected_shape):
             self.assertEqual(result.shape, expected_shape)
 
     def test_script(self):
-        net = UNet(dimensions=2, in_channels=1, out_channels=3, channels=(16, 32, 64), strides=(2, 2), num_res_units=0)
+        net = UNet(
+            spatial_dims=2, in_channels=1, out_channels=3, channels=(16, 32, 64), strides=(2, 2), num_res_units=0
+        )
         test_data = torch.randn(16, 1, 32, 32)
         test_script_save(net, test_data)
 
     def test_script_without_running_stats(self):
         net = UNet(
-            dimensions=2,
+            spatial_dims=2,
             in_channels=1,
             out_channels=3,
             channels=(16, 32, 64),
@@ -189,7 +191,7 @@ def test_script_without_running_stats(self):
 
     def test_ill_input_shape(self):
         net = UNet(
-            dimensions=2,
+            spatial_dims=2,
             in_channels=1,
             out_channels=3,
             channels=(16, 32, 64),
diff --git a/tests/test_utils_pytorch_numpy_unification.py b/tests/test_utils_pytorch_numpy_unification.py
new file mode 100644
index 0000000000..c8e0a35c92
--- /dev/null
+++ b/tests/test_utils_pytorch_numpy_unification.py
@@ -0,0 +1,46 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+
+from monai.transforms.utils_pytorch_numpy_unification import percentile
+from tests.utils import TEST_NDARRAYS, assert_allclose, set_determinism
+
+
+class TestPytorchNumpyUnification(unittest.TestCase):
+    def setUp(self) -> None:
+        set_determinism(0)
+
+    def test_percentile(self):
+        for size in (1, 100):
+            q = np.random.randint(0, 100, size=size)
+            results = []
+            for p in TEST_NDARRAYS:
+                arr = p(np.arange(100 * 101).reshape(1, 100, 101).astype(np.float32))
+                results.append(percentile(arr, q))
+                # pre torch 1.7, no `quantile`. Our own method doesn't interpolate,
+                # so we can only be accurate to 0.5
+                atol = 0.5 if not hasattr(torch, "quantile") else 1e-4
+                assert_allclose(results[0], results[-1], type_test=False, atol=atol)
+
+    def test_fails(self):
+        for p in TEST_NDARRAYS:
+            for q in (-1, 101):
+                arr = p(np.arange(100 * 101).reshape(1, 100, 101).astype(np.float32))
+                with self.assertRaises(ValueError):
+                    percentile(arr, q)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_version_leq.py b/tests/test_version_leq.py
index a1913069d3..042a561a90 100644
--- a/tests/test_version_leq.py
+++ b/tests/test_version_leq.py
@@ -67,6 +67,9 @@ def _pairwise(iterable):
     ("0post1", "0.4post1"),
     ("2.1.0-rc1", "2.1.0"),
     ("2.1dev", "2.1a0"),
+    (1.6, "1.6.0"),
+    ("1.6.0", 1.6),
+    (1.6, 1.7),
 ) + tuple(_pairwise(reversed(torture.split())))
 
 
diff --git a/tests/test_vis_gradcam.py b/tests/test_vis_gradcam.py
index eebf32d70b..ecd62badcc 100644
--- a/tests/test_vis_gradcam.py
+++ b/tests/test_vis_gradcam.py
@@ -88,6 +88,16 @@ def test_shape(self, input_data, expected_shape):
         result2 = cam(x=image, layer_idx=-1, class_idx=model(image).max(1)[-1].cpu())
         torch.testing.assert_allclose(result, result2)
 
+    def test_ill(self):
+        model = DenseNet121(spatial_dims=2, in_channels=1, out_channels=3)
+        for name, x in model.named_parameters():
+            if "features" in name:
+                x.requires_grad = False
+        cam = GradCAM(nn_module=model, target_layers="class_layers.relu")
+        image = torch.rand((2, 1, 48, 64))
+        with self.assertRaises(RuntimeError):
+            cam(x=image)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_zoom.py b/tests/test_zoom.py
index e6710ede29..9411988a7e 100644
--- a/tests/test_zoom.py
+++ b/tests/test_zoom.py
@@ -12,11 +12,12 @@
 import unittest
 
 import numpy as np
+import torch
 from parameterized import parameterized
 from scipy.ndimage import zoom as zoom_scipy
 
 from monai.transforms import Zoom
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(1.5, "nearest"), (1.5, "nearest"), (0.8, "bilinear"), (0.8, "area")]
 
@@ -26,38 +27,42 @@
 class TestZoom(NumpyImageTestCase2D):
     @parameterized.expand(VALID_CASES)
     def test_correct_results(self, zoom, mode):
-        zoom_fn = Zoom(zoom=zoom, mode=mode, keep_size=False)
-        zoomed = zoom_fn(self.imt[0])
-        _order = 0
-        if mode.endswith("linear"):
-            _order = 1
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(zoomed, expected, atol=1.0)
+        for p in TEST_NDARRAYS:
+            zoom_fn = Zoom(zoom=zoom, mode=mode, keep_size=False)
+            zoomed = zoom_fn(p(self.imt[0]))
+            _order = 0
+            if mode.endswith("linear"):
+                _order = 1
+            expected = []
+            for channel in self.imt[0]:
+                expected.append(zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False))
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed, p(expected), atol=1.0)
 
     def test_keep_size(self):
-        zoom_fn = Zoom(zoom=[0.6, 0.6], keep_size=True, align_corners=True, padding_mode="constant", constant_values=2)
-        zoomed = zoom_fn(self.imt[0], mode="bilinear")
-        np.testing.assert_allclose(zoomed.shape, self.imt.shape[1:])
+        for p in TEST_NDARRAYS:
+            zoom_fn = Zoom(zoom=[0.6, 0.6], keep_size=True, align_corners=True)
+            zoomed = zoom_fn(p(self.imt[0]), mode="bilinear")
+            assert_allclose(zoomed.shape, self.imt.shape[1:])
 
-        zoom_fn = Zoom(zoom=[1.3, 1.3], keep_size=True)
-        zoomed = zoom_fn(self.imt[0])
-        np.testing.assert_allclose(zoomed.shape, self.imt.shape[1:])
+            zoom_fn = Zoom(zoom=[1.3, 1.3], keep_size=True)
+            zoomed = zoom_fn(p(self.imt[0]))
+            assert_allclose(zoomed.shape, self.imt.shape[1:])
 
     @parameterized.expand(INVALID_CASES)
     def test_invalid_inputs(self, zoom, mode, raises):
-        with self.assertRaises(raises):
-            zoom_fn = Zoom(zoom=zoom, mode=mode)
-            zoom_fn(self.imt[0])
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                zoom_fn = Zoom(zoom=zoom, mode=mode)
+                zoom_fn(p(self.imt[0]))
 
     def test_padding_mode(self):
-        zoom_fn = Zoom(zoom=0.5, mode="nearest", padding_mode="constant", keep_size=True)
-        test_data = np.array([[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]])
-        zoomed = zoom_fn(test_data)
-        expected = np.array([[[0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])
-        np.testing.assert_allclose(zoomed, expected)
+        for p in TEST_NDARRAYS:
+            zoom_fn = Zoom(zoom=0.5, mode="nearest", padding_mode="constant", keep_size=True)
+            test_data = p([[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]]])
+            zoomed = zoom_fn(test_data)
+            expected = p([[[0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 1.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0]]])
+            torch.testing.assert_allclose(zoomed, expected)
 
 
 if __name__ == "__main__":
diff --git a/tests/test_zoomd.py b/tests/test_zoomd.py
index 1a1a905d80..6231978ca7 100644
--- a/tests/test_zoomd.py
+++ b/tests/test_zoomd.py
@@ -16,7 +16,7 @@
 from scipy.ndimage import zoom as zoom_scipy
 
 from monai.transforms import Zoomd
-from tests.utils import NumpyImageTestCase2D
+from tests.utils import TEST_NDARRAYS, NumpyImageTestCase2D, assert_allclose
 
 VALID_CASES = [(1.5, "nearest", False), (0.3, "bilinear", False), (0.8, "bilinear", False)]
 
@@ -27,38 +27,37 @@ class TestZoomd(NumpyImageTestCase2D):
     @parameterized.expand(VALID_CASES)
     def test_correct_results(self, zoom, mode, keep_size):
         key = "img"
-        zoom_fn = Zoomd(
-            key,
-            zoom=zoom,
-            mode=mode,
-            keep_size=keep_size,
-        )
-        zoomed = zoom_fn({key: self.imt[0]})
-        _order = 0
-        if mode.endswith("linear"):
-            _order = 1
-        expected = []
-        for channel in self.imt[0]:
-            expected.append(zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False))
-        expected = np.stack(expected).astype(np.float32)
-        np.testing.assert_allclose(expected, zoomed[key], atol=1.0)
+        zoom_fn = Zoomd(key, zoom=zoom, mode=mode, keep_size=keep_size)
+        for p in TEST_NDARRAYS:
+            zoomed = zoom_fn({key: p(self.imt[0])})
+            _order = 0
+            if mode.endswith("linear"):
+                _order = 1
+            expected = [
+                zoom_scipy(channel, zoom=zoom, mode="nearest", order=_order, prefilter=False) for channel in self.imt[0]
+            ]
+
+            expected = np.stack(expected).astype(np.float32)
+            assert_allclose(zoomed[key], p(expected), atol=1.0)
 
     def test_keep_size(self):
         key = "img"
         zoom_fn = Zoomd(key, zoom=0.6, keep_size=True, padding_mode="constant", constant_values=2)
-        zoomed = zoom_fn({key: self.imt[0]})
-        self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
+        for p in TEST_NDARRAYS:
+            zoomed = zoom_fn({key: p(self.imt[0])})
+            np.testing.assert_array_equal(zoomed[key].shape, self.imt.shape[1:])
 
-        zoom_fn = Zoomd(key, zoom=1.3, keep_size=True)
-        zoomed = zoom_fn({key: self.imt[0]})
-        self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
+            zoom_fn = Zoomd(key, zoom=1.3, keep_size=True)
+            zoomed = zoom_fn({key: self.imt[0]})
+            self.assertTrue(np.array_equal(zoomed[key].shape, self.imt.shape[1:]))
 
     @parameterized.expand(INVALID_CASES)
     def test_invalid_inputs(self, _, zoom, mode, raises):
         key = "img"
-        with self.assertRaises(raises):
-            zoom_fn = Zoomd(key, zoom=zoom, mode=mode)
-            zoom_fn({key: self.imt[0]})
+        for p in TEST_NDARRAYS:
+            with self.assertRaises(raises):
+                zoom_fn = Zoomd(key, zoom=zoom, mode=mode)
+                zoom_fn({key: p(self.imt[0])})
 
 
 if __name__ == "__main__":
diff --git a/tests/testing_data/cpp_resample_answers.py b/tests/testing_data/cpp_resample_answers.py
index 51ac6ccda9..67af152059 100644
--- a/tests/testing_data/cpp_resample_answers.py
+++ b/tests/testing_data/cpp_resample_answers.py
@@ -23,7 +23,7 @@ def _read_testing_data_answers(fname: Optional[str] = None, delimiter=",") -> Li
     pwd = os.path.dirname(os.path.abspath(__file__))
     filename = os.path.join(pwd, fname)
     if not os.path.isfile(filename):
-        warnings.warn("test data {} not found.".format(filename))
+        warnings.warn(f"test data {filename} not found.")
         return answers
     with open(filename) as f:
         res_reader = csv.reader(f, delimiter=delimiter)
diff --git a/tests/utils.py b/tests/utils.py
index 1375cd2d72..a3d52ae2cb 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -25,7 +25,7 @@
 from io import BytesIO
 from subprocess import PIPE, Popen
 from typing import Callable, Optional, Tuple
-from urllib.error import ContentTooShortError, HTTPError, URLError
+from urllib.error import HTTPError, URLError
 
 import numpy as np
 import torch
@@ -38,10 +38,12 @@
 from monai.utils import ensure_tuple, optional_import, set_determinism
 from monai.utils.misc import is_module_ver_at_least
 from monai.utils.module import version_leq
+from monai.utils.type_conversion import convert_data_type
 
 nib, _ = optional_import("nibabel")
 
 quick_test_var = "QUICKTEST"
+_tf32_enabled = None
 
 
 def clone(data: NdarrayTensor) -> NdarrayTensor:
@@ -57,31 +59,79 @@ def clone(data: NdarrayTensor) -> NdarrayTensor:
     return copy.deepcopy(data)
 
 
-def assert_allclose(a: NdarrayOrTensor, b: NdarrayOrTensor, *args, **kwargs):
+def assert_allclose(
+    actual: NdarrayOrTensor,
+    desired: NdarrayOrTensor,
+    type_test: bool = True,
+    device_test: bool = False,
+    *args,
+    **kwargs,
+):
     """
-    Assert that all values of two data objects are close.
+    Assert that types and all values of two data objects are close.
 
     Args:
-        a (NdarrayOrTensor): Pytorch Tensor or numpy array for comparison
-        b (NdarrayOrTensor): Pytorch Tensor or numpy array to compare against
+        actual: Pytorch Tensor or numpy array for comparison.
+        desired: Pytorch Tensor or numpy array to compare against.
+        type_test: whether to test that `actual` and `desired` are both numpy arrays or torch tensors.
+        device_test: whether to test the device property.
+        args: extra arguments to pass on to `np.testing.assert_allclose`.
+        kwargs: extra arguments to pass on to `np.testing.assert_allclose`.
+
+
     """
-    a = a.cpu() if isinstance(a, torch.Tensor) else a
-    b = b.cpu() if isinstance(b, torch.Tensor) else b
-    np.testing.assert_allclose(a, b, *args, **kwargs)
+    if type_test:
+        # check both actual and desired are of the same type
+        np.testing.assert_equal(isinstance(actual, np.ndarray), isinstance(desired, np.ndarray), "numpy type")
+        np.testing.assert_equal(isinstance(actual, torch.Tensor), isinstance(desired, torch.Tensor), "torch type")
+
+    if isinstance(desired, torch.Tensor) or isinstance(actual, torch.Tensor):
+        if device_test:
+            np.testing.assert_equal(str(actual.device), str(desired.device), "torch device check")  # type: ignore
+        actual = actual.cpu().numpy() if isinstance(actual, torch.Tensor) else actual
+        desired = desired.cpu().numpy() if isinstance(desired, torch.Tensor) else desired
+    np.testing.assert_allclose(actual, desired, *args, **kwargs)
 
 
 def test_pretrained_networks(network, input_param, device):
     try:
-        net = network(**input_param).to(device)
-    except (URLError, HTTPError, ContentTooShortError) as e:
-        raise unittest.SkipTest(e)
-    return net
+        return network(**input_param).to(device)
+    except (URLError, HTTPError) as e:
+        raise unittest.SkipTest(e) from e
 
 
 def test_is_quick():
     return os.environ.get(quick_test_var, "").lower() == "true"
 
 
+def is_tf32_env():
+    """
+    The environment variable NVIDIA_TF32_OVERRIDE=0 will override any defaults
+    or programmatic configuration of NVIDIA libraries, and consequently,
+    cuBLAS will not accelerate FP32 computations with TF32 tensor cores.
+    """
+    global _tf32_enabled
+    if _tf32_enabled is None:
+        _tf32_enabled = False
+        if (
+            torch.cuda.is_available()
+            and not version_leq(f"{torch.version.cuda}", "10.100")  # at least 11.0
+            and os.environ.get("NVIDIA_TF32_OVERRIDE", "1") != "0"
+            and torch.cuda.device_count() > 0
+        ):
+            try:
+                # with TF32 enabled, the speed is ~8x faster, but the precision has ~2 digits less in the result
+                g_gpu = torch.Generator(device="cuda")
+                g_gpu.manual_seed(2147483647)
+                a_full = torch.randn(1024, 1024, dtype=torch.double, device="cuda", generator=g_gpu)
+                b_full = torch.randn(1024, 1024, dtype=torch.double, device="cuda", generator=g_gpu)
+                _tf32_enabled = (a_full.float() @ b_full.float() - a_full @ b_full).abs().max().item() > 0.001  # 0.1713
+            except BaseException:
+                pass
+        print(f"tf32 enabled: {_tf32_enabled}")
+    return _tf32_enabled
+
+
 def skip_if_quick(obj):
     """
     Skip the unit tests if environment variable `quick_test_var=true`.
@@ -166,11 +216,15 @@ def __call__(self, obj):
         )(obj)
 
 
-def make_nifti_image(array, affine=None):
+def make_nifti_image(array: NdarrayOrTensor, affine=None):
     """
     Create a temporary nifti image on the disk and return the image name.
     User is responsible for deleting the temporary file when done with it.
     """
+    if isinstance(array, torch.Tensor):
+        array, *_ = convert_data_type(array, np.ndarray)
+    if isinstance(affine, torch.Tensor):
+        affine, *_ = convert_data_type(affine, np.ndarray)
     if affine is None:
         affine = np.eye(4)
     test_image = nib.Nifti1Image(array, affine)