From 44d23bca73088597543cbc6d055b20700c117b44 Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 22 Oct 2019 13:27:31 -0500 Subject: [PATCH 1/9] Make name and version properties required --- pytorch_lightning/logging/base.py | 7 ++++++- pytorch_lightning/logging/mlflow_logger.py | 8 ++++++++ pytorch_lightning/logging/test_tube_logger.py | 11 +++++++++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/logging/base.py b/pytorch_lightning/logging/base.py index 351b945e856f7..3047a8d3a13cd 100644 --- a/pytorch_lightning/logging/base.py +++ b/pytorch_lightning/logging/base.py @@ -64,8 +64,13 @@ def rank(self): def rank(self, value): """Set the process rank""" self._rank = value + + @property + def name(self): + """Return the experiment name""" + raise NotImplementedError("Sub-classes must provide a name property") @property def version(self): """Return the experiment version""" - return None + raise NotImplementedError("Sub-classes must provide a version property") diff --git a/pytorch_lightning/logging/mlflow_logger.py b/pytorch_lightning/logging/mlflow_logger.py index c38b7a1695ea5..a1527f630c66b 100644 --- a/pytorch_lightning/logging/mlflow_logger.py +++ b/pytorch_lightning/logging/mlflow_logger.py @@ -57,3 +57,11 @@ def finalize(self, status="FINISHED"): if status == 'success': status = 'FINISHED' self.experiment.set_terminated(self.run_id, status) + + @property + def name(self): + return self.experiment_name + + @property + def version(self): + return self._run_id diff --git a/pytorch_lightning/logging/test_tube_logger.py b/pytorch_lightning/logging/test_tube_logger.py index e8e9607f5b20b..31ab108b7baf6 100644 --- a/pytorch_lightning/logging/test_tube_logger.py +++ b/pytorch_lightning/logging/test_tube_logger.py @@ -12,7 +12,7 @@ def __init__( ): super().__init__() self.save_dir = save_dir - self.name = name + self._name = name self.description = description self.debug = debug self._version = version @@ -26,7 +26,7 @@ def experiment(self): self._experiment = Experiment( save_dir=self.save_dir, - name=self.name, + name=self._name, debug=self.debug, version=self.version, description=self.description, @@ -76,6 +76,13 @@ def rank(self, value): self._rank = value if self._experiment is not None: self.experiment.rank = value + + @property + def name(self): + if self._experiment is None: + return self._name + else: + return self.experiment.name @property def version(self): From 70f2fbbc1b5a42e08a8f6cddb9262111c1ff7889 Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 22 Oct 2019 13:29:12 -0500 Subject: [PATCH 2/9] Warn before deleting files in checkpoint directory --- pytorch_lightning/callbacks/pt_callbacks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index d5d80d2445191..2663fd44ba9f2 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -1,5 +1,6 @@ import os import shutil +import warnings import numpy as np @@ -177,6 +178,17 @@ def __init__(self, filepath, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1, prefix=''): super(ModelCheckpoint, self).__init__() + if ( + save_best_only and + os.path.exists(filepath) and + os.path.isdir(filepath) and + len(os.listdir(filepath)) > 0 + ): + warnings.warn( + f"Checkpoint directory {filepath} exists and is not empty with save_best_only=True." + "All files in this directory will be deleted when a checkpoint is saved!" + ) + self.monitor = monitor self.verbose = verbose self.filepath = filepath From d8db6e579b5533251755f99a49dbe9340d740060 Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 22 Oct 2019 13:27:55 -0500 Subject: [PATCH 3/9] Get default checkpoint path from any logger --- .../trainer/callback_config_mixin.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/trainer/callback_config_mixin.py b/pytorch_lightning/trainer/callback_config_mixin.py index 1b36bd469156b..7e42020d6b34b 100644 --- a/pytorch_lightning/trainer/callback_config_mixin.py +++ b/pytorch_lightning/trainer/callback_config_mixin.py @@ -1,3 +1,5 @@ +import os + from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping from pytorch_lightning.logging import TestTubeLogger @@ -12,14 +14,15 @@ def configure_checkpoint_callback(self): """ if self.checkpoint_callback is True: # init a default one - if isinstance(self.logger, TestTubeLogger): - ckpt_path = '{}/{}/version_{}/{}'.format( + if self.logger is not None: + ckpt_path = os.path.join([ self.default_save_path, - self.logger.experiment.name, - self.logger.experiment.version, - 'checkpoints') + self.logger.name, + self.loggger.versino, + "checkpoints" + ]) else: - ckpt_path = self.default_save_path + ckpt_path = "checkpoints" self.checkpoint_callback = ModelCheckpoint( filepath=ckpt_path From 5a3a88aa9e103a3bd89aad59b84b78da4e6735bb Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 22 Oct 2019 13:54:46 -0500 Subject: [PATCH 4/9] Fix typos --- pytorch_lightning/trainer/callback_config_mixin.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/callback_config_mixin.py b/pytorch_lightning/trainer/callback_config_mixin.py index 7e42020d6b34b..a999dcb20c94f 100644 --- a/pytorch_lightning/trainer/callback_config_mixin.py +++ b/pytorch_lightning/trainer/callback_config_mixin.py @@ -15,12 +15,12 @@ def configure_checkpoint_callback(self): if self.checkpoint_callback is True: # init a default one if self.logger is not None: - ckpt_path = os.path.join([ + ckpt_path = os.path.join( self.default_save_path, self.logger.name, - self.loggger.versino, + str(self.logger.version), "checkpoints" - ]) + ) else: ckpt_path = "checkpoints" From 8d87a6517e4d007033a6da07009c08e3b349b348 Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 22 Oct 2019 13:55:15 -0500 Subject: [PATCH 5/9] Uncomment logger tests --- tests/test_logging.py | 226 +++++++++++++++++++++--------------------- 1 file changed, 115 insertions(+), 111 deletions(-) diff --git a/tests/test_logging.py b/tests/test_logging.py index 9db657501854f..ed3b077b5b37e 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -1,3 +1,4 @@ +import os import pickle import numpy as np @@ -5,6 +6,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.testing import LightningTestModel +from pytorch_lightning.logging import LightningLoggerBase, rank_zero_only from .test_models import get_hparams, get_test_tube_logger, init_save_dir, clear_save_dir RANDOM_FILE_PATHS = list(np.random.randint(12000, 19000, 1000)) @@ -69,117 +71,119 @@ def test_testtube_pickle(): clear_save_dir() -# def test_mlflow_logger(): -# """ -# verify that basic functionality of mlflow logger works -# """ -# reset_seed() -# -# try: -# from pytorch_lightning.logging import MLFlowLogger -# except ModuleNotFoundError: -# return -# -# hparams = get_hparams() -# model = LightningTestModel(hparams) -# -# root_dir = os.path.dirname(os.path.realpath(__file__)) -# mlflow_dir = os.path.join(root_dir, "mlruns") -# import pdb -# pdb.set_trace() -# -# logger = MLFlowLogger("test", f"file://{mlflow_dir}") -# logger.log_hyperparams(hparams) -# logger.save() -# -# trainer_options = dict( -# max_nb_epochs=1, -# train_percent_check=0.01, -# logger=logger -# ) -# -# trainer = Trainer(**trainer_options) -# result = trainer.fit(model) -# -# print('result finished') -# assert result == 1, "Training failed" -# -# shutil.move(mlflow_dir, mlflow_dir + f'_{n}') - - -# def test_mlflow_pickle(): -# """ -# verify that pickling trainer with mlflow logger works -# """ -# reset_seed() -# -# try: -# from pytorch_lightning.logging import MLFlowLogger -# except ModuleNotFoundError: -# return -# -# hparams = get_hparams() -# model = LightningTestModel(hparams) -# -# root_dir = os.path.dirname(os.path.realpath(__file__)) -# mlflow_dir = os.path.join(root_dir, "mlruns") -# -# logger = MLFlowLogger("test", f"file://{mlflow_dir}") -# logger.log_hyperparams(hparams) -# logger.save() -# -# trainer_options = dict( -# max_nb_epochs=1, -# logger=logger -# ) -# -# trainer = Trainer(**trainer_options) -# pkl_bytes = pickle.dumps(trainer) -# trainer2 = pickle.loads(pkl_bytes) -# trainer2.logger.log_metrics({"acc": 1.0}) -# -# n = RANDOM_FILE_PATHS.pop() -# shutil.move(mlflow_dir, mlflow_dir + f'_{n}') - - -# def test_custom_logger(): -# -# class CustomLogger(LightningLoggerBase): -# def __init__(self): -# super().__init__() -# self.hparams_logged = None -# self.metrics_logged = None -# self.finalized = False -# -# @rank_zero_only -# def log_hyperparams(self, params): -# self.hparams_logged = params -# -# @rank_zero_only -# def log_metrics(self, metrics, step_num): -# self.metrics_logged = metrics -# -# @rank_zero_only -# def finalize(self, status): -# self.finalized_status = status -# -# hparams = get_hparams() -# model = LightningTestModel(hparams) -# -# logger = CustomLogger() -# -# trainer_options = dict( -# max_nb_epochs=1, -# train_percent_check=0.01, -# logger=logger -# ) -# -# trainer = Trainer(**trainer_options) -# result = trainer.fit(model) -# assert result == 1, "Training failed" -# assert logger.hparams_logged == hparams -# assert logger.metrics_logged != {} -# assert logger.finalized_status == "success" +def test_mlflow_logger(): + """ + verify that basic functionality of mlflow logger works + """ + reset_seed() + + try: + from pytorch_lightning.logging import MLFlowLogger + except ModuleNotFoundError: + return + + hparams = get_hparams() + model = LightningTestModel(hparams) + + root_dir = os.path.dirname(os.path.realpath(__file__)) + mlflow_dir = os.path.join(root_dir, "mlruns") + + logger = MLFlowLogger("test", f"file://{mlflow_dir}") + + trainer_options = dict( + max_nb_epochs=1, + train_percent_check=0.01, + logger=logger + ) + + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + + print('result finished') + assert result == 1, "Training failed" + + clear_save_dir() + + +def test_mlflow_pickle(): + """ + verify that pickling trainer with mlflow logger works + """ + reset_seed() + + try: + from pytorch_lightning.logging import MLFlowLogger + except ModuleNotFoundError: + return + + hparams = get_hparams() + model = LightningTestModel(hparams) + + root_dir = os.path.dirname(os.path.realpath(__file__)) + mlflow_dir = os.path.join(root_dir, "mlruns") + + logger = MLFlowLogger("test", f"file://{mlflow_dir}") + + trainer_options = dict( + max_nb_epochs=1, + logger=logger + ) + + trainer = Trainer(**trainer_options) + pkl_bytes = pickle.dumps(trainer) + trainer2 = pickle.loads(pkl_bytes) + trainer2.logger.log_metrics({"acc": 1.0}) + + clear_save_dir() + + +def test_custom_logger(tmpdir): + + class CustomLogger(LightningLoggerBase): + def __init__(self): + super().__init__() + self.hparams_logged = None + self.metrics_logged = None + self.finalized = False + + @rank_zero_only + def log_hyperparams(self, params): + self.hparams_logged = params + + @rank_zero_only + def log_metrics(self, metrics, step_num): + self.metrics_logged = metrics + + @rank_zero_only + def finalize(self, status): + self.finalized_status = status + + @property + def name(self): + return "name" + + @property + def version(self): + return "1" + + hparams = get_hparams() + model = LightningTestModel(hparams) + + logger = CustomLogger() + + trainer_options = dict( + max_nb_epochs=1, + train_percent_check=0.01, + logger=logger, + default_save_path=tmpdir + ) + + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + assert result == 1, "Training failed" + assert logger.hparams_logged == hparams + assert logger.metrics_logged != {} + assert logger.finalized_status == "success" def reset_seed(): From b669e3bb8712152d2d2724f954ab4db65de0d72c Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 22 Oct 2019 13:59:18 -0500 Subject: [PATCH 6/9] Whitespace --- pytorch_lightning/logging/base.py | 2 +- pytorch_lightning/logging/mlflow_logger.py | 4 ++-- pytorch_lightning/logging/test_tube_logger.py | 2 +- tests/test_logging.py | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/logging/base.py b/pytorch_lightning/logging/base.py index 3047a8d3a13cd..a72e072be6cc9 100644 --- a/pytorch_lightning/logging/base.py +++ b/pytorch_lightning/logging/base.py @@ -64,7 +64,7 @@ def rank(self): def rank(self, value): """Set the process rank""" self._rank = value - + @property def name(self): """Return the experiment name""" diff --git a/pytorch_lightning/logging/mlflow_logger.py b/pytorch_lightning/logging/mlflow_logger.py index a1527f630c66b..689a839623c11 100644 --- a/pytorch_lightning/logging/mlflow_logger.py +++ b/pytorch_lightning/logging/mlflow_logger.py @@ -57,11 +57,11 @@ def finalize(self, status="FINISHED"): if status == 'success': status = 'FINISHED' self.experiment.set_terminated(self.run_id, status) - + @property def name(self): return self.experiment_name - + @property def version(self): return self._run_id diff --git a/pytorch_lightning/logging/test_tube_logger.py b/pytorch_lightning/logging/test_tube_logger.py index 31ab108b7baf6..874f994eb573a 100644 --- a/pytorch_lightning/logging/test_tube_logger.py +++ b/pytorch_lightning/logging/test_tube_logger.py @@ -76,7 +76,7 @@ def rank(self, value): self._rank = value if self._experiment is not None: self.experiment.rank = value - + @property def name(self): if self._experiment is None: diff --git a/tests/test_logging.py b/tests/test_logging.py index ed3b077b5b37e..ac35cf5699bfb 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -157,15 +157,15 @@ def log_metrics(self, metrics, step_num): @rank_zero_only def finalize(self, status): self.finalized_status = status - + @property def name(self): return "name" - + @property def version(self): return "1" - + hparams = get_hparams() model = LightningTestModel(hparams) From 658bdf75738b3f8925a3869d02fec083d473513f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 23 Oct 2019 04:44:22 -0400 Subject: [PATCH 7/9] Update callback_config_mixin.py checkpoints and version file names would just have a number. it's easy to tell what you're looking at with version_ prepended --- pytorch_lightning/trainer/callback_config_mixin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/callback_config_mixin.py b/pytorch_lightning/trainer/callback_config_mixin.py index a999dcb20c94f..51ff85d1a2ab7 100644 --- a/pytorch_lightning/trainer/callback_config_mixin.py +++ b/pytorch_lightning/trainer/callback_config_mixin.py @@ -18,7 +18,7 @@ def configure_checkpoint_callback(self): ckpt_path = os.path.join( self.default_save_path, self.logger.name, - str(self.logger.version), + f'version_{self.logger.version}', "checkpoints" ) else: From f4c3985395e4c90ee8bc9d3b8221db9ecc646e24 Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 29 Oct 2019 16:26:53 -0500 Subject: [PATCH 8/9] Address comments --- pytorch_lightning/callbacks/pt_callbacks.py | 1 - pytorch_lightning/trainer/callback_config_mixin.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 2663fd44ba9f2..76aeba2b778ab 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -180,7 +180,6 @@ def __init__(self, filepath, monitor='val_loss', verbose=0, super(ModelCheckpoint, self).__init__() if ( save_best_only and - os.path.exists(filepath) and os.path.isdir(filepath) and len(os.listdir(filepath)) > 0 ): diff --git a/pytorch_lightning/trainer/callback_config_mixin.py b/pytorch_lightning/trainer/callback_config_mixin.py index 51ff85d1a2ab7..58e6ec05c011e 100644 --- a/pytorch_lightning/trainer/callback_config_mixin.py +++ b/pytorch_lightning/trainer/callback_config_mixin.py @@ -22,7 +22,7 @@ def configure_checkpoint_callback(self): "checkpoints" ) else: - ckpt_path = "checkpoints" + ckpt_path = os.path.join(self.default_save_path, "checkpoints") self.checkpoint_callback = ModelCheckpoint( filepath=ckpt_path From 13777e11bd3529baadf39f7b54be746450eb0c4e Mon Sep 17 00:00:00 2001 From: NicEggert Date: Tue, 29 Oct 2019 16:36:31 -0500 Subject: [PATCH 9/9] Fix broken tests --- tests/test_y_logging.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_y_logging.py b/tests/test_y_logging.py index 3dcfbacc635ac..c44a9e2c8b987 100644 --- a/tests/test_y_logging.py +++ b/tests/test_y_logging.py @@ -82,7 +82,7 @@ def test_mlflow_logger(): except ModuleNotFoundError: return - hparams = get_hparams() + hparams = testing_utils.get_hparams() model = LightningTestModel(hparams) root_dir = os.path.dirname(os.path.realpath(__file__)) @@ -102,7 +102,7 @@ def test_mlflow_logger(): print('result finished') assert result == 1, "Training failed" - clear_save_dir() + testing_utils.clear_save_dir() def test_mlflow_pickle(): @@ -116,7 +116,7 @@ def test_mlflow_pickle(): except ModuleNotFoundError: return - hparams = get_hparams() + hparams = testing_utils.get_hparams() model = LightningTestModel(hparams) root_dir = os.path.dirname(os.path.realpath(__file__)) @@ -134,7 +134,7 @@ def test_mlflow_pickle(): trainer2 = pickle.loads(pkl_bytes) trainer2.logger.log_metrics({"acc": 1.0}) - clear_save_dir() + testing_utils.clear_save_dir() def test_custom_logger(tmpdir): @@ -166,7 +166,7 @@ def name(self): def version(self): return "1" - hparams = get_hparams() + hparams = testing_utils.get_hparams() model = LightningTestModel(hparams) logger = CustomLogger()