diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a57a508c6cec..3c12f0f44539a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed `LSFEnvironment` to use `LSB_DJOB_RANKFILE` environment variable instead of `LSB_HOSTS` for determining node rank and main address ([#10825](https://github.com/PyTorchLightning/pytorch-lightning/pull/10825)) +- Disbled sampler replacement when using `IterableDataset` ([#11507](https://github.com/PyTorchLightning/pytorch-lightning/pull/11507)) + + ## [1.5.8] - 2022-01-05 ### Fixed diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py index 1662c1b3aaa92..fdeddcbca1e50 100644 --- a/pytorch_lightning/trainer/data_loading.py +++ b/pytorch_lightning/trainer/data_loading.py @@ -272,9 +272,13 @@ def _get_dataloader_init_kwargs( # kwargs to re-construct the dataloader dl_kwargs = {k: v for k, v in attrs.items() if k in non_defaults} - dl_kwargs.update( - TrainerDataLoadingMixin._dataloader_init_kwargs_resolve_sampler(dataloader, sampler, mode=mode) - ) + if isinstance(dl_kwargs["dataset"], IterableDataset): + dl_kwargs["batch_sampler"] = None + dl_kwargs["sampler"] = None + else: + dl_kwargs.update( + TrainerDataLoadingMixin._dataloader_init_kwargs_resolve_sampler(dataloader, sampler, mode=mode) + ) required_args = { p.name diff --git a/tests/trainer/test_data_loading.py b/tests/trainer/test_data_loading.py index 9b1e5ca45e655..8f745db4b8400 100644 --- a/tests/trainer/test_data_loading.py +++ b/tests/trainer/test_data_loading.py @@ -20,11 +20,12 @@ from torch.utils.data.sampler import BatchSampler, Sampler, SequentialSampler from pytorch_lightning import Trainer +from pytorch_lightning.trainer.data_loading import TrainerDataLoadingMixin from pytorch_lightning.trainer.states import RunningStage from pytorch_lightning.trainer.supporters import CombinedLoader from pytorch_lightning.utilities.enums import DistributedType from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.helpers import BoringModel, RandomDataset +from tests.helpers.boring_model import BoringModel, RandomDataset, RandomIterableDataset from tests.helpers.runif import RunIf @@ -389,3 +390,16 @@ def test_non_sequential_sampler_warning_is_raised_for_eval_dataloader(val_dl): trainer._data_connector.attach_data(model, val_dataloaders=val_dl) with pytest.warns(UserWarning, match="recommended .* turn this off for val/test/predict"): trainer._reset_eval_dataloader(RunningStage.VALIDATING, model) + + +@pytest.mark.parametrize("mode", [RunningStage.TRAINING, RunningStage.PREDICTING, RunningStage.TESTING]) +def test_dataloader_kwargs_replacement_with_iterable_dataset(mode): + """Test that DataLoader kwargs are not replaced when using Iterable Dataset.""" + dataset = RandomIterableDataset(7, 100) + dataloader = DataLoader(dataset, batch_size=32) + dl_kwargs = TrainerDataLoadingMixin._get_dataloader_init_kwargs(dataloader, dataloader.sampler, mode=mode) + assert dl_kwargs["sampler"] is None + assert dl_kwargs["batch_sampler"] is None + assert dl_kwargs["batch_size"] is dataloader.batch_size + assert dl_kwargs["dataset"] is dataloader.dataset + assert dl_kwargs["collate_fn"] is dataloader.collate_fn