From 0a608063c458b4a5bc9d8979eefcce233ba3d70e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 15:57:28 +0200 Subject: [PATCH 01/15] changes to seed for tests --- tests/test_models.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_models.py b/tests/test_models.py index 55259acb7d7fe..249769998f041 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -49,6 +49,8 @@ def test_early_stopping_cpu_model(): :return: """ reset_seed() + import pdb + pdb.set_trace() stopping = EarlyStopping(monitor='val_loss') trainer_options = dict( From 18d28cd242a4b78e26ca047165ce94cf91705a96 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 16:26:17 +0200 Subject: [PATCH 02/15] changes to seed for tests --- tests/test_logging.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_logging.py b/tests/test_logging.py index f9a5eac61723a..5ca59ff553a1f 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -17,13 +17,13 @@ np.random.seed(ROOT_SEED) RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000)) - +import pdb def test_testtube_logger(): """ verify that basic functionality of test tube logger works """ reset_seed() - + pdb.set_trace() hparams = get_hparams() model = LightningTestModel(hparams) @@ -43,6 +43,7 @@ def test_testtube_logger(): assert result == 1, "Training failed" clear_save_dir() + pdb.set_trace() def test_testtube_pickle(): @@ -72,7 +73,7 @@ def test_testtube_pickle(): trainer2.logger.log_metrics({"acc": 1.0}) clear_save_dir() - + pdb.set_trace() def test_mlflow_logger(): """ From 3a2872d98cbb6ff7896a4be624bed3e381f8619e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 16:29:08 +0200 Subject: [PATCH 03/15] changes to seed for tests --- tests/test_logging.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_logging.py b/tests/test_logging.py index 5ca59ff553a1f..838900640e17d 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -17,13 +17,12 @@ np.random.seed(ROOT_SEED) RANDOM_SEEDS = list(np.random.randint(0, 10000, 1000)) -import pdb + def test_testtube_logger(): """ verify that basic functionality of test tube logger works """ reset_seed() - pdb.set_trace() hparams = get_hparams() model = LightningTestModel(hparams) @@ -43,7 +42,6 @@ def test_testtube_logger(): assert result == 1, "Training failed" clear_save_dir() - pdb.set_trace() def test_testtube_pickle(): @@ -73,7 +71,7 @@ def test_testtube_pickle(): trainer2.logger.log_metrics({"acc": 1.0}) clear_save_dir() - pdb.set_trace() + def test_mlflow_logger(): """ @@ -91,6 +89,8 @@ def test_mlflow_logger(): root_dir = os.path.dirname(os.path.realpath(__file__)) mlflow_dir = os.path.join(root_dir, "mlruns") + import pdb + pdb.set_trace() logger = MLFlowLogger("test", f"file://{mlflow_dir}") logger.log_hyperparams(hparams) @@ -108,6 +108,7 @@ def test_mlflow_logger(): assert result == 1, "Training failed" n = RANDOM_FILE_PATHS.pop() + pdb.set_trace() shutil.move(mlflow_dir, mlflow_dir + f'_{n}') From 50c5abb956392dd5639bbfa34277f47d93c6e7f4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:10:55 +0200 Subject: [PATCH 04/15] changes to seed for tests --- tests/test_logging.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_logging.py b/tests/test_logging.py index 838900640e17d..7a3eaea0c2e51 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -105,6 +105,8 @@ def test_mlflow_logger(): trainer = Trainer(**trainer_options) result = trainer.fit(model) + print('result finished') + pdb.set_trace() assert result == 1, "Training failed" n = RANDOM_FILE_PATHS.pop() From 39521cb1efef399d9d32f99a659cf7d28d4cf404 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:12:40 +0200 Subject: [PATCH 05/15] changes to seed for tests --- pytorch_lightning/trainer/trainer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 3b4fe2c5a3e40..ab75f298f8b7a 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -779,6 +779,8 @@ def fit(self, model): self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) self.__run_pretrain_routine(model) + print('pretrain done') + pdb.set_trace() # return 1 when finished # used for testing or when we need to know that training succeeded From c1c91b502ba78b3216a50c11a4e5d8b174374840 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:17:35 +0200 Subject: [PATCH 06/15] changes to seed for tests --- tests/test_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_logging.py b/tests/test_logging.py index 7a3eaea0c2e51..461f6da64713d 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -104,9 +104,9 @@ def test_mlflow_logger(): trainer = Trainer(**trainer_options) result = trainer.fit(model) + pdb.set_trace() print('result finished') - pdb.set_trace() assert result == 1, "Training failed" n = RANDOM_FILE_PATHS.pop() From 2cfbb590ae0b937abe33fdba91d050cec84cbc38 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:20:58 +0200 Subject: [PATCH 07/15] changes to seed for tests --- pytorch_lightning/trainer/trainer.py | 2 - tests/test_logging.py | 145 +++++++++++++-------------- 2 files changed, 71 insertions(+), 76 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index ab75f298f8b7a..3b4fe2c5a3e40 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -779,8 +779,6 @@ def fit(self, model): self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) self.__run_pretrain_routine(model) - print('pretrain done') - pdb.set_trace() # return 1 when finished # used for testing or when we need to know that training succeeded diff --git a/tests/test_logging.py b/tests/test_logging.py index 461f6da64713d..7460dcfd2c9aa 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -73,80 +73,77 @@ def test_testtube_pickle(): clear_save_dir() -def test_mlflow_logger(): - """ - verify that basic functionality of mlflow logger works - """ - reset_seed() - - try: - from pytorch_lightning.logging import MLFlowLogger - except ModuleNotFoundError: - return - - hparams = get_hparams() - model = LightningTestModel(hparams) - - root_dir = os.path.dirname(os.path.realpath(__file__)) - mlflow_dir = os.path.join(root_dir, "mlruns") - import pdb - pdb.set_trace() - - logger = MLFlowLogger("test", f"file://{mlflow_dir}") - logger.log_hyperparams(hparams) - logger.save() - - trainer_options = dict( - max_nb_epochs=1, - train_percent_check=0.01, - logger=logger - ) - - trainer = Trainer(**trainer_options) - result = trainer.fit(model) - pdb.set_trace() - - print('result finished') - assert result == 1, "Training failed" - - n = RANDOM_FILE_PATHS.pop() - pdb.set_trace() - shutil.move(mlflow_dir, mlflow_dir + f'_{n}') - - -def test_mlflow_pickle(): - """ - verify that pickling trainer with mlflow logger works - """ - reset_seed() - - try: - from pytorch_lightning.logging import MLFlowLogger - except ModuleNotFoundError: - return - - hparams = get_hparams() - model = LightningTestModel(hparams) - - root_dir = os.path.dirname(os.path.realpath(__file__)) - mlflow_dir = os.path.join(root_dir, "mlruns") - - logger = MLFlowLogger("test", f"file://{mlflow_dir}") - logger.log_hyperparams(hparams) - logger.save() - - trainer_options = dict( - max_nb_epochs=1, - logger=logger - ) - - trainer = Trainer(**trainer_options) - pkl_bytes = pickle.dumps(trainer) - trainer2 = pickle.loads(pkl_bytes) - trainer2.logger.log_metrics({"acc": 1.0}) - - n = RANDOM_FILE_PATHS.pop() - shutil.move(mlflow_dir, mlflow_dir + f'_{n}') +# def test_mlflow_logger(): +# """ +# verify that basic functionality of mlflow logger works +# """ +# reset_seed() +# +# try: +# from pytorch_lightning.logging import MLFlowLogger +# except ModuleNotFoundError: +# return +# +# hparams = get_hparams() +# model = LightningTestModel(hparams) +# +# root_dir = os.path.dirname(os.path.realpath(__file__)) +# mlflow_dir = os.path.join(root_dir, "mlruns") +# import pdb +# pdb.set_trace() +# +# logger = MLFlowLogger("test", f"file://{mlflow_dir}") +# logger.log_hyperparams(hparams) +# logger.save() +# +# trainer_options = dict( +# max_nb_epochs=1, +# train_percent_check=0.01, +# logger=logger +# ) +# +# trainer = Trainer(**trainer_options) +# result = trainer.fit(model) +# +# print('result finished') +# assert result == 1, "Training failed" +# +# shutil.move(mlflow_dir, mlflow_dir + f'_{n}') + + +# def test_mlflow_pickle(): +# """ +# verify that pickling trainer with mlflow logger works +# """ +# reset_seed() +# +# try: +# from pytorch_lightning.logging import MLFlowLogger +# except ModuleNotFoundError: +# return +# +# hparams = get_hparams() +# model = LightningTestModel(hparams) +# +# root_dir = os.path.dirname(os.path.realpath(__file__)) +# mlflow_dir = os.path.join(root_dir, "mlruns") +# +# logger = MLFlowLogger("test", f"file://{mlflow_dir}") +# logger.log_hyperparams(hparams) +# logger.save() +# +# trainer_options = dict( +# max_nb_epochs=1, +# logger=logger +# ) +# +# trainer = Trainer(**trainer_options) +# pkl_bytes = pickle.dumps(trainer) +# trainer2 = pickle.loads(pkl_bytes) +# trainer2.logger.log_metrics({"acc": 1.0}) +# +# n = RANDOM_FILE_PATHS.pop() +# shutil.move(mlflow_dir, mlflow_dir + f'_{n}') def test_custom_logger(): From 5da0f08f72e4471ed56b490a83429088b5a1a0a9 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:22:33 +0200 Subject: [PATCH 08/15] changes to seed for tests --- tests/test_logging.py | 76 +++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/tests/test_logging.py b/tests/test_logging.py index 7460dcfd2c9aa..cc3489e8448a0 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -146,44 +146,44 @@ def test_testtube_pickle(): # shutil.move(mlflow_dir, mlflow_dir + f'_{n}') -def test_custom_logger(): - - class CustomLogger(LightningLoggerBase): - def __init__(self): - super().__init__() - self.hparams_logged = None - self.metrics_logged = None - self.finalized = False - - @rank_zero_only - def log_hyperparams(self, params): - self.hparams_logged = params - - @rank_zero_only - def log_metrics(self, metrics, step_num): - self.metrics_logged = metrics - - @rank_zero_only - def finalize(self, status): - self.finalized_status = status - - hparams = get_hparams() - model = LightningTestModel(hparams) - - logger = CustomLogger() - - trainer_options = dict( - max_nb_epochs=1, - train_percent_check=0.01, - logger=logger - ) - - trainer = Trainer(**trainer_options) - result = trainer.fit(model) - assert result == 1, "Training failed" - assert logger.hparams_logged == hparams - assert logger.metrics_logged != {} - assert logger.finalized_status == "success" +# def test_custom_logger(): +# +# class CustomLogger(LightningLoggerBase): +# def __init__(self): +# super().__init__() +# self.hparams_logged = None +# self.metrics_logged = None +# self.finalized = False +# +# @rank_zero_only +# def log_hyperparams(self, params): +# self.hparams_logged = params +# +# @rank_zero_only +# def log_metrics(self, metrics, step_num): +# self.metrics_logged = metrics +# +# @rank_zero_only +# def finalize(self, status): +# self.finalized_status = status +# +# hparams = get_hparams() +# model = LightningTestModel(hparams) +# +# logger = CustomLogger() +# +# trainer_options = dict( +# max_nb_epochs=1, +# train_percent_check=0.01, +# logger=logger +# ) +# +# trainer = Trainer(**trainer_options) +# result = trainer.fit(model) +# assert result == 1, "Training failed" +# assert logger.hparams_logged == hparams +# assert logger.metrics_logged != {} +# assert logger.finalized_status == "success" def reset_seed(): From 0663c225c48d8bc07586b373892e6735b3655647 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:24:12 +0200 Subject: [PATCH 09/15] changes to seed for tests --- tests/test_models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 249769998f041..55259acb7d7fe 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -49,8 +49,6 @@ def test_early_stopping_cpu_model(): :return: """ reset_seed() - import pdb - pdb.set_trace() stopping = EarlyStopping(monitor='val_loss') trainer_options = dict( From e67cb5ab4812ebe47e845cdd51090f58dad14a25 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:34:34 +0200 Subject: [PATCH 10/15] changes to seed for tests --- tests/test_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index 55259acb7d7fe..fa8a58be0ebb1 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -130,7 +130,7 @@ def test_lbfgs_cpu_model(): reset_seed() trainer_options = dict( - max_nb_epochs=2, + max_nb_epochs=1, print_nan_grads=True, show_progress_bar=False, weights_summary='top', @@ -139,7 +139,7 @@ def test_lbfgs_cpu_model(): ) model, hparams = get_model(use_test_model=True, lbfgs=True) - run_model_test_no_loggers(trainer_options, model, hparams, on_gpu=False, min_acc=0.40) + run_model_test_no_loggers(trainer_options, model, hparams, on_gpu=False, min_acc=0.30) clear_save_dir() From 2676a35111b4de55d3b0f171566e67ab3428ea2b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:35:01 +0200 Subject: [PATCH 11/15] changes to seed for tests --- .run_local_tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.run_local_tests.sh b/.run_local_tests.sh index 831a3a554384e..460b437b31dcc 100644 --- a/.run_local_tests.sh +++ b/.run_local_tests.sh @@ -2,5 +2,6 @@ rm -rf _ckpt_* rm -rf tests/save_dir* rm -rf tests/mlruns_* +rm -rf tests/tests/* coverage run --source pytorch_lightning -m py.test pytorch_lightning tests examples -v --doctest-modules coverage report -m From 313749949ccb97b1ff900a611ae56288fd3cd0e1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:41:01 +0200 Subject: [PATCH 12/15] changes to seed for tests --- tests/test_models.py | 50 ++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/test_models.py b/tests/test_models.py index fa8a58be0ebb1..f76e5e921d27e 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -43,6 +43,31 @@ # ------------------------------------------------------------------------ # TESTS # ------------------------------------------------------------------------ +def test_multi_gpu_model_ddp2(): + """ + Make sure DDP2 works + :return: + """ + if not can_run_gpu_test(): + return + + reset_seed() + set_random_master_port() + + model, hparams = get_model() + trainer_options = dict( + show_progress_bar=True, + max_nb_epochs=1, + train_percent_check=0.4, + val_percent_check=0.2, + gpus=2, + weights_summary=None, + distributed_backend='ddp2' + ) + + run_gpu_model_test(trainer_options, model, hparams) + + def test_early_stopping_cpu_model(): """ Test each of the trainer options @@ -171,31 +196,6 @@ def test_default_logger_callbacks_cpu_model(): clear_save_dir() -def test_multi_gpu_model_ddp2(): - """ - Make sure DDP2 works - :return: - """ - if not can_run_gpu_test(): - return - - reset_seed() - set_random_master_port() - - model, hparams = get_model() - trainer_options = dict( - show_progress_bar=True, - max_nb_epochs=1, - train_percent_check=0.4, - val_percent_check=0.2, - gpus=2, - weights_summary=None, - distributed_backend='ddp2' - ) - - run_gpu_model_test(trainer_options, model, hparams) - - def test_dp_resume(): """ Make sure DP continues training correctly From 1e592203a8e8836b8d9b9a30504edbbf3187e372 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:43:39 +0200 Subject: [PATCH 13/15] changes to seed for tests --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 3b4fe2c5a3e40..d2c41f848cc1a 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1550,5 +1550,6 @@ def __run_evaluation(self, test=False): # model checkpointing if self.proc_rank == 0 and self.checkpoint_callback is not None and not test: + pdb.set_trace() self.checkpoint_callback.on_epoch_end(epoch=self.current_epoch, logs=self.callback_metrics) From 3939c506613c5641359f4179d9c0b5a8e240611a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:46:22 +0200 Subject: [PATCH 14/15] changes to seed for tests --- pytorch_lightning/trainer/trainer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index d2c41f848cc1a..0f1e66c82207b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1368,6 +1368,11 @@ def __process_output(self, output, train=False): callback_metrics.update(progress_bar_metrics) callback_metrics.update(log_metrics) + # convert tensors to numpy + for k, v in callback_metrics: + if isinstance(v, torch.Tensor): + callback_metrics[k] = v.item() + return loss, progress_bar_metrics, log_metrics, callback_metrics def __clip_gradients(self): @@ -1550,6 +1555,5 @@ def __run_evaluation(self, test=False): # model checkpointing if self.proc_rank == 0 and self.checkpoint_callback is not None and not test: - pdb.set_trace() self.checkpoint_callback.on_epoch_end(epoch=self.current_epoch, logs=self.callback_metrics) From 59840eb177fe27942a03f52744376c9b289fbfca Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 18 Oct 2019 22:47:32 +0200 Subject: [PATCH 15/15] changes to seed for tests --- pytorch_lightning/trainer/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 0f1e66c82207b..984dbdcd8fa21 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1369,7 +1369,7 @@ def __process_output(self, output, train=False): callback_metrics.update(log_metrics) # convert tensors to numpy - for k, v in callback_metrics: + for k, v in callback_metrics.items(): if isinstance(v, torch.Tensor): callback_metrics[k] = v.item()