Lightning-AI · carmocca · Jul 18, 2022 · Jul 15, 2022 · Jul 15, 2022 · Jul 15, 2022
@@ -18,7 +18,7 @@ set -e
 # this environment variable allows special tests to run
 export PL_RUN_STANDALONE_TESTS=1
 # python arguments
-defaults='-m coverage run --source pytorch_lightning --append -m pytest --capture=no'
+defaults='-m coverage run --source pytorch_lightning --append -m pytest --no-header'
 
 # find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
 grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')
@@ -40,22 +40,47 @@ parametrizations_arr=($parametrizations)
 # tests to skip - space separated
 blocklist='profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx utilities/test_warnings.py'
 report=''
+test_batch_size=6
+
+rm -f standalone_test_output.txt  # in case it exists, remove it
+function show_batched_output {
+  if [ -f standalone_test_output.txt ]; then  # if exists
+    cat standalone_test_output.txt
+    rm standalone_test_output.txt
+  fi
+}
+trap show_batched_output EXIT  # show the output on exit
 
 for i in "${!parametrizations_arr[@]}"; do
   parametrization=${parametrizations_arr[$i]}
 
   # check blocklist
   if echo $blocklist | grep -F "${parametrization}"; then
     report+="Skipped\t$parametrization\n"
-    continue
+    # do not continue the loop because we might need to wait for batched jobs
+  else
+    echo "Running $parametrization"
+    # execute the test in the background
+    # redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
+    # output to std{out,err} because the outputs would be garbled together
+    python ${defaults} "$parametrization" &>> standalone_test_output.txt &
+    # save the PID in an array
+    pids[${i}]=$!
+    # add row to the final report
+    report+="Ran\t$parametrization\n"
   fi
 
-  # run the test
-  echo "Running $parametrization"
-  python ${defaults} "$parametrization"
-
-  report+="Ran\t$parametrization\n"
+  if ((($i + 1) % $test_batch_size == 0)); then
+    # wait for running tests
+    for pid in ${pids[*]}; do wait $pid; done
+    unset pids  # empty the array
+    show_batched_output
+  fi
 done
+# wait for leftover tests
+for pid in ${pids[*]}; do wait $pid; done
+show_batched_output
+echo "Batched mode finished. Continuing with the rest of standalone tests."
 
 if nvcc --version; then
     nvprof --profile-from-start off -o trace_name.prof -- python ${defaults} profilers/test_profiler.py::test_pytorch_profiler_nested_emit_nvtx

@@ -26,7 +26,7 @@
 from torch.utils.data import DataLoader
 from torchmetrics import Accuracy
 
-from pytorch_lightning import LightningDataModule, LightningModule, seed_everything, Trainer
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer
 from pytorch_lightning.callbacks import Callback, LearningRateMonitor, ModelCheckpoint
 from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset
 from pytorch_lightning.plugins import DeepSpeedPrecisionPlugin
@@ -712,7 +712,6 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config
 @pytest.mark.parametrize(("accumulate_grad_batches", "automatic_optimization"), [(1, False), (2, True)])
 @RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
 def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization, accumulate_grad_batches):
-    seed_everything(1)
     if automatic_optimization:
         model = ModelParallelClassificationModel()
     else:
@@ -734,9 +733,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
     trainer.fit(model, datamodule=dm)
 
     results = trainer.test(datamodule=dm)
-    assert results[0]["test_acc"] > 0.7
     saved_results = trainer.test(ckpt_path=ck.best_model_path, datamodule=dm)
-    assert saved_results[0]["test_acc"] > 0.7
     assert saved_results == results
 
     if automatic_optimization:
@@ -752,9 +749,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
         enable_progress_bar=False,
         enable_model_summary=False,
     )
-
-    results = trainer.test(model, datamodule=dm, ckpt_path=ck.best_model_path)
-    assert results[0]["test_acc"] > 0.7
+    trainer.test(model, datamodule=dm, ckpt_path=ck.best_model_path)
 
 
 @RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
@@ -861,7 +856,6 @@ def on_train_epoch_start(self, trainer: Trainer, pl_module: LightningModule) ->
 @RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
 def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer):
     """Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works."""
-    seed_everything(42)
 
     class VerificationCallback(Callback):
         def __init__(self):