From e4fe713148ae01be502f57b9373f60399d1c0fb0 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Dec 2020 19:56:56 +0000 Subject: [PATCH 01/17] check if optimizer support closure --- pytorch_lightning/core/lightning.py | 8 ++- pytorch_lightning/core/optimizer.py | 8 ++- pytorch_lightning/utilities/__init__.py | 1 + tests/core/test_lightning_optimizer.py | 85 ++++++++++++++++++++++++- 4 files changed, 99 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index c33297934eed1..41febc407a00f 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1236,6 +1236,8 @@ def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, model hook don't forget to add the call to it before ``optimizer.zero_grad()`` yourself. """ + support_closure = 'closure' in inspect.signature(optimizer.step).parameters.keys() + if on_tpu and TPU_AVAILABLE: xm.optimizer_step(optimizer, optimizer_args={'closure': optimizer_closure, **kwargs}) @@ -1244,7 +1246,11 @@ def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, self.trainer, optimizer, optimizer_closure) else: - optimizer.step(closure=optimizer_closure, *args, **kwargs) + if support_closure: + optimizer.step(closure=optimizer_closure, *args, **kwargs) + else: + optimizer_closure() + optimizer.step(*args, **kwargs) def optimizer_zero_grad( self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index f8f6a7b6c0f12..a75378d634812 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import inspect import types from typing import Any, Callable, Optional from weakref import proxy @@ -61,6 +62,7 @@ def __init__(self, self._optimizer = optimizer self._accumulate_grad_batches = accumulate_grad_batches self._use_accumulate_grad_batches_from_trainer = accumulate_grad_batches is None + self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters.keys() def _on_trainer_init(self, trainer): self._trainer = proxy(trainer) @@ -199,7 +201,11 @@ def dis_closure(): else: with trainer.profiler.profile(profiler_name): - optimizer.step(closure=closure, *args, **kwargs) + if self._support_closure: + optimizer.step(closure=closure, *args, **kwargs) + else: + closure() + optimizer.step(*args, **kwargs) # perform zero grad optimizer.zero_grad() diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index 1e2eeea9f456c..85cb799500fbd 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -48,6 +48,7 @@ def _module_available(module_path: str) -> bool: OMEGACONF_AVAILABLE = _module_available("omegaconf") HYDRA_AVAILABLE = _module_available("hydra") HOROVOD_AVAILABLE = _module_available("horovod.torch") +BOLT_AVAILABLE = _module_available("pl_bolts") TPU_AVAILABLE = XLADeviceUtils.tpu_device_exists() FAIRSCALE_AVAILABLE = platform.system() != 'Windows' and _module_available('fairscale.nn.data_parallel') diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index a120365237e9f..736b14e5e6360 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -19,9 +19,16 @@ import torch.nn as nn from torch.optim import Adam, Optimizer +import pytorch_lightning as pl from pytorch_lightning import LightningModule, Trainer +from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core.optimizer import LightningOptimizer -from tests.base.boring_model import BoringModel, RandomDictDataset, RandomDictStringDataset +from pytorch_lightning.utilities import BOLT_AVAILABLE +from tests.base.boring_model import BoringModel, RandomDataset, RandomDictDataset, RandomDictStringDataset + +if BOLT_AVAILABLE: + from pl_bolts.optimizers.lars_scheduling import LARSWrapper + from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR def test_lightning_optimizer(tmpdir): @@ -195,3 +202,79 @@ def test_state(tmpdir): assert lightning_dict == optimizer.__dict__ assert optimizer.state_dict() == lightning_optimizer.state_dict() assert optimizer.state == lightning_optimizer.state + + +def test_lightning_optimizer_state(tmpdir): + class CheckpointEveryNSteps(pl.Callback): + """ + Save a checkpoint every N steps, instead of Lightning's default that checkpoints + based on validation loss. + """ + + def __init__( + self, + save_step_frequency, + prefix="latest-Checkpoint", + use_modelcheckpoint_filename=False, + ): + """ + Args: + save_step_frequency: how often to save in steps + prefix: add a prefix to the name, only used if + use_modelcheckpoint_filename=False + use_modelcheckpoint_filename: just use the ModelCheckpoint callback's + default filename, don't use ours. + """ + self.save_step_frequency = save_step_frequency + self.prefix = prefix + self.use_modelcheckpoint_filename = use_modelcheckpoint_filename + + def on_batch_end(self, trainer: pl.Trainer, _): + """ Check if we should save a checkpoint after every train batch """ + global_step = trainer.global_step + if global_step % self.save_step_frequency == 0: + if self.use_modelcheckpoint_filename: + filename = trainer.checkpoint_callback.filename + else: + filename = "{}.ckpt".format(self.prefix) + ckpt_path = os.path.join(trainer.checkpoint_callback.dirpath, filename) + trainer.save_checkpoint(ckpt_path) + + class TestModel(BoringModel): + + def on_train_epoch_start(self) -> None: + print('override any method to prove your bug') + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=0.1) + + optimizer = LARSWrapper(optimizer) + scheduler = LinearWarmupCosineAnnealingLR( + optimizer, + warmup_epochs=1, + max_epochs=20 + ) + return [optimizer], [scheduler] + + train_data = torch.utils.data.DataLoader(RandomDataset(32, 64), batch_size=1) + val_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) + test_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) + + checkpoint_callback = ModelCheckpoint( + monitor='loss', + mode='min', + filepath=tmpdir + ) + + model = TestModel() + trainer = Trainer( + default_root_dir=tmpdir, + max_epochs=10, + weights_summary=None, + accelerator='ddp', + log_every_n_steps=1, + gpus=1, + checkpoint_callback=checkpoint_callback, + callbacks=[CheckpointEveryNSteps(1)] + ) + trainer.fit(model, train_data, val_data) From 46712a0fe75301bee285bf1dac73001aad3937d6 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Dec 2020 20:03:36 +0000 Subject: [PATCH 02/17] cleanup test --- tests/core/test_lightning_optimizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 736b14e5e6360..122cb5e541f49 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -204,6 +204,8 @@ def test_state(tmpdir): assert optimizer.state == lightning_optimizer.state +@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") +@pytest.mark.skipif(not BOLT_AVAILABLE, reason="Bolt is required for this test") def test_lightning_optimizer_state(tmpdir): class CheckpointEveryNSteps(pl.Callback): """ @@ -271,7 +273,7 @@ def configure_optimizers(self): default_root_dir=tmpdir, max_epochs=10, weights_summary=None, - accelerator='ddp', + accelerator='ddp_spawn', log_every_n_steps=1, gpus=1, checkpoint_callback=checkpoint_callback, From 1c8df24e7b78b2c7f0d76772a95278cbcd341e0d Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Dec 2020 20:51:21 +0000 Subject: [PATCH 03/17] resolve tests --- pytorch_lightning/core/optimizer.py | 1 + tests/core/test_lightning_optimizer.py | 19 ++++++++++--------- .../optimization/test_manual_optimization.py | 16 +++++++++------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index a75378d634812..af5c35d79c88a 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -201,6 +201,7 @@ def dis_closure(): else: with trainer.profiler.profile(profiler_name): + print(optimizer) if self._support_closure: optimizer.step(closure=closure, *args, **kwargs) else: diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 122cb5e541f49..cb4c730c3cc46 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -86,8 +86,8 @@ def configure_optimizers(self): assert trainer.optimizers[0].__repr__() == expected -@patch("torch.optim.Adam.step") -@patch("torch.optim.SGD.step") +@patch("torch.optim.Adam.step", autospec=True) +@patch("torch.optim.SGD.step", autospec=True) def test_lightning_optimizer_manual_optimization(mock_sgd_step, mock_adam_step, tmpdir): """ Test that the user can use our LightningOptimizer. Not recommended for now. @@ -102,13 +102,13 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): output = self.layer(batch) loss_1 = self.loss(batch, output) self.manual_backward(loss_1, opt_1) - opt_1.step(idx="1") + opt_1.step() def closure(): output = self.layer(batch) loss_2 = self.loss(batch, output) self.manual_backward(loss_2, opt_2) - opt_2.step(closure=closure, idx="2") + opt_2.step(closure=closure) def configure_optimizers(self): optimizer_1 = torch.optim.SGD(self.layer.parameters(), lr=0.1) @@ -135,8 +135,8 @@ def configure_optimizers(self): assert len(mock_adam_step.mock_calls) == 8 -@patch("torch.optim.Adam.step") -@patch("torch.optim.SGD.step") +@patch("torch.optim.Adam.step", autospec=True) +@patch("torch.optim.SGD.step", autospec=True) def test_lightning_optimizer_manual_optimization_and_accumulated_gradients(mock_sgd_step, mock_adam_step, tmpdir): """ Test that the user can use our LightningOptimizer. Not recommended. @@ -151,13 +151,13 @@ def training_step(self, batch, batch_idx, optimizer_idx=None): output = self.layer(batch) loss_1 = self.loss(batch, output) self.manual_backward(loss_1, opt_1) - opt_1.step(idx="1") + opt_1.step() def closure(): output = self.layer(batch) loss_2 = self.loss(batch, output) self.manual_backward(loss_2, opt_2) - opt_2.step(closure=closure, idx="2") + opt_2.step(closure=closure) def configure_optimizers(self): optimizer_1 = torch.optim.SGD(self.layer.parameters(), lr=0.1) @@ -195,7 +195,8 @@ def test_state(tmpdir): assert isinstance(lightning_optimizer, Optimizer) lightning_dict = {} special_attrs = ["_accumulate_grad_batches", "_optimizer", "_optimizer_idx", - "_trainer", "_use_accumulate_grad_batches_from_trainer", "_lightning_step"] + "_trainer", "_use_accumulate_grad_batches_from_trainer", "_lightning_step", + "_support_closure"] for k, v in lightning_optimizer.__dict__.items(): if k not in special_attrs: lightning_dict[k] = v diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py index 1a904daebefbc..22b2613ab7d66 100644 --- a/tests/trainer/optimization/test_manual_optimization.py +++ b/tests/trainer/optimization/test_manual_optimization.py @@ -761,7 +761,7 @@ def configure_optimizers(self): assert trainer.dev_debugger.count_events('backward_call') == limit_train_batches * 2 -@patch("torch.optim.SGD.step") +@patch("torch.optim.SGD.step", autospec=True) def test_step_with_optimizer_closure_and_extra_arguments(step_mock, tmpdir): """ Tests that `step` works with optimizer_closure and extra arguments @@ -784,7 +784,7 @@ def optimizer_closure(): retain_graph = num_backward != backward_idx # noqa E225 self.manual_backward(loss_1, opt, retain_graph=retain_graph) - opt.step(1, closure=optimizer_closure, something="new") + opt.step(closure=optimizer_closure) def training_epoch_end(self, outputs) -> None: # outputs should be an array with an entry per optimizer @@ -811,11 +811,12 @@ def configure_optimizers(self): ) trainer.fit(model) - expected_calls = [call(1, closure=ANY, something="new") for s in range(2)] + # patch(..., autospec=True) adds self + expected_calls = [call(ANY, closure=ANY) for s in range(2)] step_mock.assert_has_calls(expected_calls) -@patch("torch.optim.Adam.step") +@patch("torch.optim.Adam.step", autospec=True) @patch("torch.optim.SGD.step") def test_step_with_optimizer_closure_with_different_frequencies(mock_sgd_step, mock_adam_step, tmpdir): """ @@ -858,7 +859,7 @@ def dis_closure(): if batch_idx % 4 == 0 : # Note: Set make_optimizer_step to True or it will use by default # Trainer(accumulate_grad_batches=x) - opt_dis.step(closure=dis_closure, make_optimizer_step=True, optim='adam') + opt_dis.step(closure=dis_closure, make_optimizer_step=True) def training_epoch_end(self, outputs) -> None: # outputs should be an array with an entry per optimizer @@ -886,8 +887,9 @@ def configure_optimizers(self): ) trainer.fit(model) - expected_calls = [call(closure=ANY, optim='sgd') for s in range(4)] + expected_calls = [call(optim='sgd') for s in range(4)] mock_sgd_step.assert_has_calls(expected_calls) - expected_calls = [call(closure=ANY, optim='adam') for s in range(2)] + # ANY is added because autospec=True adds self + expected_calls = [call(ANY, closure=ANY) for s in range(2)] mock_adam_step.assert_has_calls(expected_calls) From 96adb50e234e6713c15795aed69a97e1de8aa769 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Dec 2020 21:44:24 +0000 Subject: [PATCH 04/17] resolve flake --- tests/trainer/optimization/test_manual_optimization.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py index 22b2613ab7d66..ed6e8d90e73d4 100644 --- a/tests/trainer/optimization/test_manual_optimization.py +++ b/tests/trainer/optimization/test_manual_optimization.py @@ -889,7 +889,5 @@ def configure_optimizers(self): trainer.fit(model) expected_calls = [call(optim='sgd') for s in range(4)] mock_sgd_step.assert_has_calls(expected_calls) - - # ANY is added because autospec=True adds self expected_calls = [call(ANY, closure=ANY) for s in range(2)] mock_adam_step.assert_has_calls(expected_calls) From 4cb08eab69cdd239863d5dcee6d3abb43beea8c7 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 4 Dec 2020 21:59:11 +0000 Subject: [PATCH 05/17] update test due to patch limit --- tests/trainer/optimization/test_manual_optimization.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/trainer/optimization/test_manual_optimization.py b/tests/trainer/optimization/test_manual_optimization.py index ed6e8d90e73d4..decdf9454a80f 100644 --- a/tests/trainer/optimization/test_manual_optimization.py +++ b/tests/trainer/optimization/test_manual_optimization.py @@ -761,7 +761,7 @@ def configure_optimizers(self): assert trainer.dev_debugger.count_events('backward_call') == limit_train_batches * 2 -@patch("torch.optim.SGD.step", autospec=True) +@patch("torch.optim.SGD.step") def test_step_with_optimizer_closure_and_extra_arguments(step_mock, tmpdir): """ Tests that `step` works with optimizer_closure and extra arguments @@ -811,12 +811,11 @@ def configure_optimizers(self): ) trainer.fit(model) - # patch(..., autospec=True) adds self - expected_calls = [call(ANY, closure=ANY) for s in range(2)] + expected_calls = [call() for s in range(2)] step_mock.assert_has_calls(expected_calls) -@patch("torch.optim.Adam.step", autospec=True) +@patch("torch.optim.Adam.step") @patch("torch.optim.SGD.step") def test_step_with_optimizer_closure_with_different_frequencies(mock_sgd_step, mock_adam_step, tmpdir): """ @@ -889,5 +888,5 @@ def configure_optimizers(self): trainer.fit(model) expected_calls = [call(optim='sgd') for s in range(4)] mock_sgd_step.assert_has_calls(expected_calls) - expected_calls = [call(ANY, closure=ANY) for s in range(2)] + expected_calls = [call() for s in range(2)] mock_adam_step.assert_has_calls(expected_calls) From b5661b45fb21fc4c051b56f1e56d89c8f56804bd Mon Sep 17 00:00:00 2001 From: tchaton Date: Sun, 6 Dec 2020 14:52:05 +0000 Subject: [PATCH 06/17] update --- pytorch_lightning/core/optimizer.py | 1 - pytorch_lightning/utilities/__init__.py | 2 +- requirements/test.txt | 1 + tests/core/test_lightning_optimizer.py | 52 ++----------------------- 4 files changed, 5 insertions(+), 51 deletions(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index af5c35d79c88a..a75378d634812 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -201,7 +201,6 @@ def dis_closure(): else: with trainer.profiler.profile(profiler_name): - print(optimizer) if self._support_closure: optimizer.step(closure=closure, *args, **kwargs) else: diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index 85cb799500fbd..54587a7dbb803 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -48,7 +48,7 @@ def _module_available(module_path: str) -> bool: OMEGACONF_AVAILABLE = _module_available("omegaconf") HYDRA_AVAILABLE = _module_available("hydra") HOROVOD_AVAILABLE = _module_available("horovod.torch") -BOLT_AVAILABLE = _module_available("pl_bolts") +BOLTS_AVAILABLE = _module_available("pl_bolts") TPU_AVAILABLE = XLADeviceUtils.tpu_device_exists() FAIRSCALE_AVAILABLE = platform.system() != 'Windows' and _module_available('fairscale.nn.data_parallel') diff --git a/requirements/test.txt b/requirements/test.txt index 74c62f82c7ff4..b12ffcda838ef 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -16,3 +16,4 @@ pre-commit>=1.0 cloudpickle>=1.3 nltk>=3.3 +pl_bolts>=0.2.5 diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index cb4c730c3cc46..7c4678c6860ef 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -23,10 +23,10 @@ from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core.optimizer import LightningOptimizer -from pytorch_lightning.utilities import BOLT_AVAILABLE +from pytorch_lightning.utilities import BOLTS_AVAILABLE from tests.base.boring_model import BoringModel, RandomDataset, RandomDictDataset, RandomDictStringDataset -if BOLT_AVAILABLE: +if BOLTS_AVAILABLE: from pl_bolts.optimizers.lars_scheduling import LARSWrapper from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR @@ -206,48 +206,10 @@ def test_state(tmpdir): @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -@pytest.mark.skipif(not BOLT_AVAILABLE, reason="Bolt is required for this test") +@pytest.mark.skipif(not BOLTS_AVAILABLE, reason="Bolt is required for this test") def test_lightning_optimizer_state(tmpdir): - class CheckpointEveryNSteps(pl.Callback): - """ - Save a checkpoint every N steps, instead of Lightning's default that checkpoints - based on validation loss. - """ - - def __init__( - self, - save_step_frequency, - prefix="latest-Checkpoint", - use_modelcheckpoint_filename=False, - ): - """ - Args: - save_step_frequency: how often to save in steps - prefix: add a prefix to the name, only used if - use_modelcheckpoint_filename=False - use_modelcheckpoint_filename: just use the ModelCheckpoint callback's - default filename, don't use ours. - """ - self.save_step_frequency = save_step_frequency - self.prefix = prefix - self.use_modelcheckpoint_filename = use_modelcheckpoint_filename - - def on_batch_end(self, trainer: pl.Trainer, _): - """ Check if we should save a checkpoint after every train batch """ - global_step = trainer.global_step - if global_step % self.save_step_frequency == 0: - if self.use_modelcheckpoint_filename: - filename = trainer.checkpoint_callback.filename - else: - filename = "{}.ckpt".format(self.prefix) - ckpt_path = os.path.join(trainer.checkpoint_callback.dirpath, filename) - trainer.save_checkpoint(ckpt_path) - class TestModel(BoringModel): - def on_train_epoch_start(self) -> None: - print('override any method to prove your bug') - def configure_optimizers(self): optimizer = torch.optim.Adam(self.parameters(), lr=0.1) @@ -263,12 +225,6 @@ def configure_optimizers(self): val_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) test_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) - checkpoint_callback = ModelCheckpoint( - monitor='loss', - mode='min', - filepath=tmpdir - ) - model = TestModel() trainer = Trainer( default_root_dir=tmpdir, @@ -277,7 +233,5 @@ def configure_optimizers(self): accelerator='ddp_spawn', log_every_n_steps=1, gpus=1, - checkpoint_callback=checkpoint_callback, - callbacks=[CheckpointEveryNSteps(1)] ) trainer.fit(model, train_data, val_data) From d4a8bb5095e8fe10b8551223fcc88dcf87bfc18d Mon Sep 17 00:00:00 2001 From: tchaton Date: Sun, 6 Dec 2020 14:55:11 +0000 Subject: [PATCH 07/17] update dep --- requirements/test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/test.txt b/requirements/test.txt index b12ffcda838ef..9f9b6d899f53c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -16,4 +16,4 @@ pre-commit>=1.0 cloudpickle>=1.3 nltk>=3.3 -pl_bolts>=0.2.5 +pytorch-lightning-bolts>=0.2.5 From 94131bdd77740c4fa8b8c41ee61246c2aa3b6b96 Mon Sep 17 00:00:00 2001 From: chaton Date: Wed, 9 Dec 2020 15:18:11 +0000 Subject: [PATCH 08/17] Update tests/core/test_lightning_optimizer.py Co-authored-by: Rohit Gupta --- tests/core/test_lightning_optimizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 7e678eac559ea..e63007acaef8a 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -238,7 +238,8 @@ def configure_optimizers(self): gpus=1, ) trainer.fit(model, train_data, val_data) -======= + + def test_lightning_optimizer_automatic_optimization(tmpdir): """ Test lightning optimize works with make_optimizer_step in automatic_optimization @@ -451,4 +452,4 @@ def configure_optimizers(self): trainer.fit(model) assert adam_zero_grad.call_count == 20 - assert sgd_zero_grad.call_count == 5 \ No newline at end of file + assert sgd_zero_grad.call_count == 5 From 33cf17889fff42b836ea35e5e7eee4dd067ece8f Mon Sep 17 00:00:00 2001 From: chaton Date: Wed, 9 Dec 2020 15:18:45 +0000 Subject: [PATCH 09/17] Update tests/core/test_lightning_optimizer.py Co-authored-by: Rohit Gupta --- tests/core/test_lightning_optimizer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index e63007acaef8a..bff0ed78d5b36 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -207,9 +207,8 @@ def test_state(tmpdir): assert optimizer.state == lightning_optimizer.state - @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -@pytest.mark.skipif(not BOLTS_AVAILABLE, reason="Bolt is required for this test") +@pytest.mark.skipif(not BOLTS_AVAILABLE, reason="Bolts is required for this test") def test_lightning_optimizer_state(tmpdir): class TestModel(BoringModel): From 842cad6efc54c472bd3553e4115e382692729a39 Mon Sep 17 00:00:00 2001 From: tchaton Date: Wed, 9 Dec 2020 15:56:00 +0000 Subject: [PATCH 10/17] resolve bug --- pytorch_lightning/core/optimizer.py | 10 +++++----- requirements/extra.txt | 3 ++- tests/core/test_lightning_optimizer.py | 26 +++++++++++--------------- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index 5219a78c9f822..8e8859b5efe3c 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -113,11 +113,11 @@ def __optimizer_step(self, *args, closure: Optional[Callable] = None, profiler_n else: with trainer.profiler.profile(profiler_name): - if self._support_closure: - optimizer.step(closure=closure, *args, **kwargs) - else: - closure() - optimizer.step(*args, **kwargs) + if self._support_closure: + optimizer.step(closure=closure, *args, **kwargs) + else: + closure() + optimizer.step(*args, **kwargs) accelerator_backend = trainer.accelerator_backend if accelerator_backend is not None and accelerator_backend.rpc_enabled: diff --git a/requirements/extra.txt b/requirements/extra.txt index ad54358269bd1..6ec40ee0e23f4 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -7,4 +7,5 @@ torchtext>=0.3.1, <0.7 # TODO: temporary fix fix for compatibility onnx>=1.7.0 onnxruntime>=1.3.0 hydra-core>=1.0 -https://github.com/PyTorchLightning/fairscale/archive/pl_1.1.0.zip \ No newline at end of file +https://github.com/PyTorchLightning/fairscale/archive/pl_1.1.0.zip +https://github.com/PyTorchLightning/pytorch-lightning-bolts/archive/0.2.5rc1.zip diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index bff0ed78d5b36..bd2dbbe9a2932 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -24,13 +24,12 @@ from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core.optimizer import LightningOptimizer from pytorch_lightning.utilities import BOLTS_AVAILABLE +from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base.boring_model import BoringModel, RandomDataset, RandomDictDataset, RandomDictStringDataset if BOLTS_AVAILABLE: from pl_bolts.optimizers.lars_scheduling import LARSWrapper from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR -from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base.boring_model import BoringModel, RandomDictDataset, RandomDictStringDataset def test_lightning_optimizer(tmpdir): @@ -207,27 +206,24 @@ def test_state(tmpdir): assert optimizer.state == lightning_optimizer.state +class TestLightningOptimizerModel(BoringModel): + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=0.1) + + optimizer = LARSWrapper(optimizer) + return [optimizer] + + @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @pytest.mark.skipif(not BOLTS_AVAILABLE, reason="Bolts is required for this test") def test_lightning_optimizer_state(tmpdir): - class TestModel(BoringModel): - - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=0.1) - - optimizer = LARSWrapper(optimizer) - scheduler = LinearWarmupCosineAnnealingLR( - optimizer, - warmup_epochs=1, - max_epochs=20 - ) - return [optimizer], [scheduler] train_data = torch.utils.data.DataLoader(RandomDataset(32, 64), batch_size=1) val_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) test_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) - model = TestModel() + model = TestLightningOptimizerModel() trainer = Trainer( default_root_dir=tmpdir, max_epochs=10, From 715b82022660b8445f48d062913bca19f0b138b0 Mon Sep 17 00:00:00 2001 From: tchaton Date: Wed, 9 Dec 2020 16:06:11 +0000 Subject: [PATCH 11/17] update test --- tests/core/test_lightning_module.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/core/test_lightning_module.py b/tests/core/test_lightning_module.py index 0c71259373d1b..17633661a511d 100644 --- a/tests/core/test_lightning_module.py +++ b/tests/core/test_lightning_module.py @@ -56,6 +56,11 @@ def test_automatic_optimization_num_calls(enable_pl_optimizer, tmpdir): class TestModel(BoringModel): + def training_step(self, batch, batch_idx, optimizer_idx): + output = self.layer(batch) + loss = self.loss(batch, output) + return {"loss": loss} + def configure_optimizers(self): optimizer = SGD(self.layer.parameters(), lr=0.1) optimizer_2 = Adam(self.layer.parameters(), lr=0.1) From 13dfeafd8afe10d3f3b0cf9328cf0585903a3e1a Mon Sep 17 00:00:00 2001 From: tchaton Date: Wed, 9 Dec 2020 16:36:06 +0000 Subject: [PATCH 12/17] resolve tests --- pytorch_lightning/core/optimizer.py | 2 -- tests/core/test_lightning_optimizer.py | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index 8e8859b5efe3c..076ea89ccffce 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -62,7 +62,6 @@ def __init__(self, self._optimizer = optimizer self._accumulate_grad_batches = accumulate_grad_batches self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters.keys() - self._automatic_optimization = None self._optimizer_idx = None @property @@ -75,7 +74,6 @@ def accumulate_grad_batches(self, accumulate_grad_batches): def _on_trainer_init(self, trainer): self._trainer = proxy(trainer) - self._automatic_optimization = trainer.train_loop.automatic_optimization for opt_idx, opt in enumerate(trainer.optimizers): if opt == self._optimizer: self._optimizer_idx = opt_idx diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index bd2dbbe9a2932..ebdcaa970e7b1 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -195,9 +195,8 @@ def test_state(tmpdir): assert isinstance(lightning_optimizer, Adam) assert isinstance(lightning_optimizer, Optimizer) lightning_dict = {} - special_attrs = ["_accumulate_grad_batches", "_optimizer", "_optimizer_idx", - "_trainer", "_use_accumulate_grad_batches_from_trainer", "_lightning_step", - "_support_closure", "_accumulate_grad_batches"] + special_attrs = ["_accumulate_grad_batches", "_optimizer", "_optimizer_idx", "_support_closure", + "_trainer"] for k, v in lightning_optimizer.__dict__.items(): if k not in special_attrs: lightning_dict[k] = v From f2d8b72c9803d831cdb5b74994a0bc93f50874fb Mon Sep 17 00:00:00 2001 From: chaton Date: Wed, 9 Dec 2020 19:49:58 +0000 Subject: [PATCH 13/17] Update requirements/extra.txt Co-authored-by: Jirka Borovec --- requirements/extra.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements/extra.txt b/requirements/extra.txt index 6ec40ee0e23f4..3f14b1e5910dd 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -8,4 +8,3 @@ onnx>=1.7.0 onnxruntime>=1.3.0 hydra-core>=1.0 https://github.com/PyTorchLightning/fairscale/archive/pl_1.1.0.zip -https://github.com/PyTorchLightning/pytorch-lightning-bolts/archive/0.2.5rc1.zip From 59bf59534f458067cd9c56f6abc93f0feca75616 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 09:30:02 +0100 Subject: [PATCH 14/17] remove bolts dep --- requirements/extra.txt | 1 - requirements/test.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements/extra.txt b/requirements/extra.txt index 6ec40ee0e23f4..3f14b1e5910dd 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -8,4 +8,3 @@ onnx>=1.7.0 onnxruntime>=1.3.0 hydra-core>=1.0 https://github.com/PyTorchLightning/fairscale/archive/pl_1.1.0.zip -https://github.com/PyTorchLightning/pytorch-lightning-bolts/archive/0.2.5rc1.zip diff --git a/requirements/test.txt b/requirements/test.txt index d367eb44fe1fa..0b50dbbb687e8 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -17,4 +17,4 @@ pre-commit>=1.0 cloudpickle>=1.3 nltk>=3.3 -pytorch-lightning-bolts>=0.2.5 +pytorch-lightning-bolts From 76a102212c6ddfd7255b23d5a2dec09484627199 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 09:51:16 +0100 Subject: [PATCH 15/17] remove bolts --- pytorch_lightning/core/optimizer.py | 2 +- requirements/test.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/core/optimizer.py b/pytorch_lightning/core/optimizer.py index 076ea89ccffce..e6b973b336e43 100644 --- a/pytorch_lightning/core/optimizer.py +++ b/pytorch_lightning/core/optimizer.py @@ -61,7 +61,7 @@ def __init__(self, self._trainer = None self._optimizer = optimizer self._accumulate_grad_batches = accumulate_grad_batches - self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters.keys() + self._support_closure = 'closure' in inspect.signature(optimizer.step).parameters self._optimizer_idx = None @property diff --git a/requirements/test.txt b/requirements/test.txt index 0b50dbbb687e8..3cb538a98d7c8 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -17,4 +17,3 @@ pre-commit>=1.0 cloudpickle>=1.3 nltk>=3.3 -pytorch-lightning-bolts From 5b52e856f115c59933b5434f9e608fc299b3b5e9 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 12:38:33 +0100 Subject: [PATCH 16/17] add missing bolts dep for tests --- requirements/test.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements/test.txt b/requirements/test.txt index 3cb538a98d7c8..d367eb44fe1fa 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -17,3 +17,4 @@ pre-commit>=1.0 cloudpickle>=1.3 nltk>=3.3 +pytorch-lightning-bolts>=0.2.5 From 83faead879b81257a69d508e69b78dd4d8ada8d9 Mon Sep 17 00:00:00 2001 From: tchaton Date: Fri, 11 Dec 2020 12:59:24 +0100 Subject: [PATCH 17/17] remove need for bolts --- requirements/test.txt | 1 - tests/core/test_lightning_optimizer.py | 55 ++++++++++++++++---------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/requirements/test.txt b/requirements/test.txt index d367eb44fe1fa..3cb538a98d7c8 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -17,4 +17,3 @@ pre-commit>=1.0 cloudpickle>=1.3 nltk>=3.3 -pytorch-lightning-bolts>=0.2.5 diff --git a/tests/core/test_lightning_optimizer.py b/tests/core/test_lightning_optimizer.py index 34835d177203f..16963a2af3c0d 100644 --- a/tests/core/test_lightning_optimizer.py +++ b/tests/core/test_lightning_optimizer.py @@ -23,14 +23,9 @@ from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core.optimizer import LightningOptimizer -from pytorch_lightning.utilities import BOLTS_AVAILABLE from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.base.boring_model import BoringModel, RandomDataset, RandomDictDataset, RandomDictStringDataset -if BOLTS_AVAILABLE: - from pl_bolts.optimizers.lars_scheduling import LARSWrapper - from pl_bolts.optimizers.lr_scheduler import LinearWarmupCosineAnnealingLR - def test_lightning_optimizer(tmpdir): """ @@ -212,33 +207,53 @@ def test_state(tmpdir): assert optimizer.state == lightning_optimizer.state -class TestLightningOptimizerModel(BoringModel): +def test_lightning_optimizer_with_wrong_optimizer_interface(tmpdir): + class OptimizerWrapper(object): + def __init__(self, optimizer): + self.optim = optimizer + self.state_dict = self.optim.state_dict + self.load_state_dict = self.optim.load_state_dict + self.zero_grad = self.optim.zero_grad + self.add_param_group = self.optim.add_param_group + self.__setstate__ = self.optim.__setstate__ + self.__getstate__ = self.optim.__getstate__ + self.__repr__ = self.optim.__repr__ - def configure_optimizers(self): - optimizer = torch.optim.Adam(self.parameters(), lr=0.1) + @property + def __class__(self): + return Optimizer - optimizer = LARSWrapper(optimizer) - return [optimizer] + @property + def state(self): + return self.optim.state + + @property + def param_groups(self): + return self.optim.param_groups + @param_groups.setter + def param_groups(self, value): + self.optim.param_groups = value -@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") -@pytest.mark.skipif(not BOLTS_AVAILABLE, reason="Bolts is required for this test") -def test_lightning_optimizer_state(tmpdir): + def step(self): + # wrongly defined step. Should contain closure + self.optim.step(closure=None) - train_data = torch.utils.data.DataLoader(RandomDataset(32, 64), batch_size=1) - val_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) - test_data = torch.utils.data.DataLoader(RandomDataset(32, 64),batch_size=1) + class TestLightningOptimizerModel(BoringModel): + + def configure_optimizers(self): + optimizer = torch.optim.Adam(self.parameters(), lr=0.1) + optimizer = OptimizerWrapper(optimizer) + return [optimizer] model = TestLightningOptimizerModel() trainer = Trainer( default_root_dir=tmpdir, - max_epochs=10, + max_epochs=1, weights_summary=None, - accelerator='ddp_spawn', log_every_n_steps=1, - gpus=1, ) - trainer.fit(model, train_data, val_data) + trainer.fit(model) def test_lightning_optimizer_automatic_optimization(tmpdir):