From ee20b83349fb847ad4a7d1c69d9cbce084ce28be Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 15:12:54 -0500 Subject: [PATCH 001/155] updated gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 654e70b1cbc1a..43541c9dcbe80 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ tests/save_dir default/ lightning_logs/ tests/tests/ +*.rst +/docs/source/*.md # Byte-compiled / optimized / DLL files __pycache__/ From 2916a05f72ccc23a378ccecdb8a916b0b63b1aef Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 15:12:54 -0500 Subject: [PATCH 002/155] updated gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 654e70b1cbc1a..43541c9dcbe80 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ tests/save_dir default/ lightning_logs/ tests/tests/ +*.rst +/docs/source/*.md # Byte-compiled / optimized / DLL files __pycache__/ From 8efaba1591c2cba15a801e254a4b9c91f70c6c88 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 18:35:01 -0500 Subject: [PATCH 003/155] updated links in ninja file --- docs/source/_templates/theme_variables.jinja | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/source/_templates/theme_variables.jinja b/docs/source/_templates/theme_variables.jinja index 4982f35867a49..3a67ad64d384d 100644 --- a/docs/source/_templates/theme_variables.jinja +++ b/docs/source/_templates/theme_variables.jinja @@ -1,17 +1,17 @@ {%- set external_urls = { - 'github': 'https://github.com/williamFalcon/pytorch-lightning', - 'github_issues': 'https://github.com/williamFalcon/pytorch-lightning/issues', - 'contributing': 'https://github.com/williamFalcon/pytorch-lightning/blob/master/CONTRIBUTING.md', - 'docs': 'https://williamfalcon.github.io/pytorch-lightning', + 'github': 'https://github.com/PytorchLightning/pytorch-lightning', + 'github_issues': 'https://github.com/PytorchLightning/pytorch-lightning/issues', + 'contributing': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/CONTRIBUTING.md', + 'docs': 'https://pytorchlightning.github.io/pytorch-lightning', 'twitter': 'https://twitter.com/PyTorchLightnin', 'discuss': 'https://discuss.pytorch.org', - 'tutorials': 'https://williamfalcon.github.io/pytorch-lightning/', - 'previous_pytorch_versions': 'https://williamfalcon.github.io/pytorch-lightning/', - 'home': 'https://williamfalcon.github.io/pytorch-lightning/', - 'get_started': 'https://williamfalcon.github.io/pytorch-lightning/', - 'features': 'https://williamfalcon.github.io/pytorch-lightning/', - 'blog': 'https://williamfalcon.github.io/pytorch-lightning/', - 'resources': 'https://williamfalcon.github.io/pytorch-lightning/', - 'support': 'https://williamfalcon.github.io/pytorch-lightning/', + 'tutorials': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'previous_pytorch_versions': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'home': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'get_started': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'features': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'blog': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'resources': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'support': 'https://pytorchlightning.github.io/pytorch-lightning/', } -%} From f3d517deb5146dbb8d772817397e9fed354edd93 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 19:44:02 -0500 Subject: [PATCH 004/155] updated docs --- docs/source/conf.py | 1 + docs/source/index.rst | 8 ++++---- docs/source/new-project.rst | 18 +++++++++++------- pytorch_lightning/trainer/__init__.py | 6 +++++- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index da0774a1d864d..fa3558ab7968c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -83,6 +83,7 @@ 'sphinx.ext.autosummary', 'sphinx.ext.napoleon', 'recommonmark', + 'sphinx.ext.autosectionlabel', # 'm2r', 'nbsphinx', ] diff --git a/docs/source/index.rst b/docs/source/index.rst index 0dd7a6af9f681..2fde388cd8d4b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,13 +3,13 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to PyTorch-Lightning! +PyTorch-Lightning Documentation ============================= .. toctree:: - :maxdepth: 4 + :maxdepth: 1 :name: start - :caption: Quick Start + :caption: Start Here new-project examples @@ -17,7 +17,7 @@ Welcome to PyTorch-Lightning! .. toctree:: :maxdepth: 4 :name: docs - :caption: Docs + :caption: Python API documentation diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 448e7e3817090..3bf8e3d45d945 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -1,13 +1,13 @@ Quick Start =========== -To start a new project define two files, a LightningModule and a Trainer file. -To illustrate Lightning power and simplicity, here's an example of a typical research flow. +| To start a new project define two files, a LightningModule and a Trainer file. +| To illustrate the power of Lightning and its simplicity, here's an example of a typical research flow. Case 1: BERT ------------ -Let's say you're working on something like BERT but want to try different ways of training or even different networks. -You would define a single LightningModule and use flags to switch between your different ideas. +| Let's say you're working on something like BERT but want to try different ways of training or even different networks. +| You would define a single LightningModule and use flags to switch between your different ideas. .. code-block:: python @@ -66,6 +66,10 @@ Then you could do rapid research by switching between these two and using the sa **Notice a few things about this flow:** -1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. -2. You get free GPU and 16-bit support without writing any of that code in your model. -3. You also get all of the capabilities below (without coding or testing yourself). +1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. +2. You get free GPU and 16-bit support without writing any of that code in your model. +3. You also get all of the capabilities below (without coding or testing yourself). + +- :ref:`Examples & Tutorials` +- :ref:`Examples & Tutorials` +- :ref:`Examples & Tutorials` diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 88087a24f2d65..96254c263c4a9 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -16,4 +16,8 @@ trainer = Trainer() trainer.fit(model) -""" +The Trainer holds all the engineering code you might need such as distributing over GPUs or early stopping. +The LightningTemplate holds the core computations, train, val, test loop, optimizer and dataloaders. + +This pattern de-couples the engineering from the science which makes your code reusable and free to run on any hardware. +""" \ No newline at end of file From 7003f74751cf9a08bbb1b51ce8fe23a58156e9e0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:36:57 -0500 Subject: [PATCH 005/155] finished callbacks --- docs/source/new-project.rst | 2 +- pytorch_lightning/callbacks/pt_callbacks.py | 114 +++++++++++--------- 2 files changed, 67 insertions(+), 49 deletions(-) diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 3bf8e3d45d945..62de6879ae0b0 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -70,6 +70,6 @@ Then you could do rapid research by switching between these two and using the sa 2. You get free GPU and 16-bit support without writing any of that code in your model. 3. You also get all of the capabilities below (without coding or testing yourself). -- :ref:`Examples & Tutorials` +- :ref:`Callbacks` - :ref:`Examples & Tutorials` - :ref:`Examples & Tutorials` diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 1951719877199..dc4158f85013c 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -1,3 +1,9 @@ +""" +Callbacks +==================================== +Callbacks supported by Lightning +""" + import os import shutil import logging @@ -8,26 +14,7 @@ class Callback(object): - """Abstract base class used to build new callbacks. - - # Properties - * params: dict. Training parameters - (eg. verbosity, batch size, number of epochs...). - Reference of the model being trained. - - The `logs` dictionary that callback methods take as argument will contain keys - for quantities relevant to the current batch or epoch. - Currently, the `.fit()` method of the `Sequential` model class will include the following - quantities in the `logs` that it passes to its callbacks: - * on_epoch_end: logs include `acc` and `loss`, and - optionally include `val_loss` - (if validation is enabled in `fit`), and `val_acc` - (if validation and accuracy monitoring are enabled). - * on_batch_begin: logs include `size`, - the number of samples in the current batch. - * on_batch_end: logs include `loss`, and optionally `acc` - (if accuracy monitoring is enabled). - + r"""Abstract base class used to build new callbacks. """ def __init__(self): @@ -43,12 +30,29 @@ def set_model(self, model): self.model = model def on_epoch_begin(self, epoch, logs=None): + r""" + called when the epoch begins + + Args: + epoch (int): current epoch + logs (dict): key-value pairs of quantities to monitor + + Example: + >>> on_epoch_begin(epoch=2, logs={'val_loss': 0.2}) + """ pass def on_epoch_end(self, epoch, logs=None): pass def on_batch_begin(self, batch, logs=None): + r""" + called when the batch starts. + + Args: + batch (Tensor): current batch tensor + logs (dict): key-value pairs of quantities to monitor + """ pass def on_batch_end(self, batch, logs=None): @@ -62,25 +66,28 @@ def on_train_end(self, logs=None): class EarlyStopping(Callback): - """Stop training when a monitored quantity has stopped improving. + r""" + Stop training when a monitored quantity has stopped improving. - # Arguments - monitor: quantity to be monitored. - min_delta: minimum change in the monitored quantity + Args: + monitor (str): quantity to be monitored. + min_delta (float): minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement. - patience: number of epochs with no improvement + patience (int): number of epochs with no improvement after which training will be stopped. - verbose: verbosity mode. - mode: one of {auto, min, max}. In `min` mode, + verbose (bool): verbosity mode. + mode (str): one of {auto, min, max}. In `min` mode, training will stop when the quantity monitored has stopped decreasing; in `max` mode it will stop when the quantity monitored has stopped increasing; in `auto` mode, the direction is automatically inferred from the name of the monitored quantity. - + Example: + >>> from pytorch_lightning.callbacks import EarlyStopping + >>> EarlyStopping('val_loss') """ def __init__(self, monitor='val_loss', @@ -150,20 +157,21 @@ def on_train_end(self, logs=None): class ModelCheckpoint(Callback): - """Save the model after every epoch. - - The `filepath` can contain named formatting options, - which will be filled the value of `epoch` and - keys in `logs` (passed in `on_epoch_end`). - For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, - then the model checkpoints will be saved with the epoch number and - the validation loss in the filename. - - # Arguments - filepath: string, path to save the model file. - monitor: quantity to monitor. - verbose: verbosity mode, 0 or 1. - save_top_k: if `save_top_k == k`, + r""" + + Save the model after every epoch. + + Args: + filepath (str): path to save the model file. + Can contain named formatting options to be auto-filled. + + Example: + >>> # save epoch and val_loss in name + >>> ModelCheckpoint(filepath='{epoch:02d}-{val_loss:.2f}.hdf5') + >>> # saves file like: /path/epoch_2-val_loss_0.2.hdf5 + monitor (str): quantity to monitor. + verbose (bool): verbosity mode, 0 or 1. + save_top_k (int): if `save_top_k == k`, the best k models according to the quantity monitored will be saved. if `save_top_k == 0`, no models are saved. @@ -172,7 +180,7 @@ class ModelCheckpoint(Callback): if `save_top_k >= 2` and the callback is called multiple times inside an epoch, the name of the saved file will be appended with a version count starting with `v0`. - mode: one of {auto, min, max}. + mode (str): one of {auto, min, max}. If `save_top_k != 0`, the decision to overwrite the current save file is made based on either the maximization or the @@ -180,11 +188,16 @@ class ModelCheckpoint(Callback): this should be `max`, for `val_loss` this should be `min`, etc. In `auto` mode, the direction is automatically inferred from the name of the monitored quantity. - save_weights_only: if True, then only the model's weights will be + save_weights_only (bool): if True, then only the model's weights will be saved (`model.save_weights(filepath)`), else the full model is saved (`model.save(filepath)`). - period: Interval (number of epochs) between checkpoints. + period (int): Interval (number of epochs) between checkpoints. + Example: + >>> from pytorch_lightning.callbacks import ModelCheckpoint + >>> ModelCheckpoint(filepath='my_path') + >>> # saves checkpoints to my_path whenever 'val_loss' has a new min + """ def __init__(self, filepath, monitor='val_loss', verbose=0, @@ -330,11 +343,16 @@ def on_epoch_end(self, epoch, logs=None): class GradientAccumulationScheduler(Callback): - """Change gradient accumulation factor according to scheduling. + r""" + Change gradient accumulation factor according to scheduling. - # Arguments - scheduling: dict, scheduling in format {epoch: accumulation_factor} + Args: + scheduling (dict): scheduling in format {epoch: accumulation_factor} + Example: + >>> from pytorch_lightning.callbacks import GradientAccumulationScheduler + >>> # at epoch 5 start accumulating every 2 batches + >>> GradientAccumulationScheduler(scheduling: {5: 2}) """ def __init__(self, scheduling: dict): From 592e087df17f0498ee5400d2d0e997fa6735dd21 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:50:46 -0500 Subject: [PATCH 006/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 4 +++- pytorch_lightning/trainer/auto_mix_precision.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 96254c263c4a9..e8b9dc05ec484 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,5 +1,7 @@ """ -# Trainer +Trainer +==================================== + The lightning trainer abstracts best practices for running a training, val, test routine. It calls parts of your model when it wants to hand over full control and otherwise makes diff --git a/pytorch_lightning/trainer/auto_mix_precision.py b/pytorch_lightning/trainer/auto_mix_precision.py index b28193c0bd12d..2915f2465fbb9 100644 --- a/pytorch_lightning/trainer/auto_mix_precision.py +++ b/pytorch_lightning/trainer/auto_mix_precision.py @@ -1,3 +1,4 @@ + from abc import ABC try: From 519f70edf02a69e0a07520df290545734cf1f288 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:57:04 -0500 Subject: [PATCH 007/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index e8b9dc05ec484..e3ffd3e1f3f27 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -2,8 +2,7 @@ Trainer ==================================== - -The lightning trainer abstracts best practices for running a training, val, test routine. +The lightning Trainer abstracts best practices for running a training, val, test routine. It calls parts of your model when it wants to hand over full control and otherwise makes training assumptions which are now standard practice in AI research. From da721527a884314c62812af0223db5b3b9fd8821 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:17:16 -0500 Subject: [PATCH 008/155] fixed left menu --- docs/source/index.rst | 3 ++- pytorch_lightning/core/lightning.py | 5 +++++ pytorch_lightning/trainer/trainer.py | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 2fde388cd8d4b..db1a051c48ec6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,7 +19,8 @@ PyTorch-Lightning Documentation :name: docs :caption: Python API - documentation + trainer + lightning-module .. toctree:: :maxdepth: 1 diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 22fff33367d34..07af367fbe2aa 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1,3 +1,8 @@ +""" +LightningModule +==================================== + +""" import os import warnings import collections diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a1133004bc448..8a768abd68694 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,4 +1,7 @@ """ +AAA +==================================== + The trainer handles all the logic for running a val loop, training loop, distributing, etc.. . """ @@ -50,6 +53,8 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): + r"""Abstract base class used to build new callbacks. + """ def __init__( self, From 88c84dccb0c080d79e09bcf6397cf56ac5d69281 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:29:30 -0500 Subject: [PATCH 009/155] added callbacks to menu --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index db1a051c48ec6..5687013ce45e4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,6 +21,7 @@ PyTorch-Lightning Documentation trainer lightning-module + callbacks .. toctree:: :maxdepth: 1 From 2f12f21f3401edb387bbbb1467a4c1e37bc931e3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:36:43 -0500 Subject: [PATCH 010/155] added direct links to docs --- docs/source/index.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 5687013ce45e4..c219884b353c8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,9 +19,10 @@ PyTorch-Lightning Documentation :name: docs :caption: Python API - trainer - lightning-module callbacks + lightning-module + logging + trainer .. toctree:: :maxdepth: 1 From 9bccb4ccddf7247f4aa2b33c2d68480e44e4003f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:43:04 -0500 Subject: [PATCH 011/155] added direct links to docs --- pytorch_lightning/core/lightning.py | 12 +++++++++--- pytorch_lightning/trainer/__init__.py | 3 --- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 07af367fbe2aa..536463566e126 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1,8 +1,14 @@ """ -LightningModule -==================================== - +The LightningModule is the "system recipe." It groups the following in one file: + - computational system definition + - computations done on forward + - training loop + - validation loop + - testing loop + - train, val, test dataloaders + - optimizers """ + import os import warnings import collections diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index e3ffd3e1f3f27..71138861096b9 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,7 +1,4 @@ """ -Trainer -==================================== - The lightning Trainer abstracts best practices for running a training, val, test routine. It calls parts of your model when it wants to hand over full control and otherwise makes training assumptions which are now standard practice in AI research. From d71342cb6145eab2a0de1c4f3f22576725b04b14 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:49:12 -0500 Subject: [PATCH 012/155] added direct links to docs --- pytorch_lightning/core/lightning.py | 2 -- pytorch_lightning/trainer/trainer.py | 6 ------ 2 files changed, 8 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 536463566e126..4a2b1fe7d77b3 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -80,8 +80,6 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks): def __init__(self): # put the dimensions of the first input to your system self.example_input_array = torch.rand(5, 28 * 28) - - """ def __init__(self, *args, **kwargs): diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 8a768abd68694..258b7cffe3aca 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,9 +1,3 @@ -""" -AAA -==================================== - -The trainer handles all the logic for running a val loop, training loop, distributing, etc.. . -""" import os import sys From 937978f0d352a153b3b1a285bfecd6245249e5b0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 22:01:11 -0500 Subject: [PATCH 013/155] added direct links to docs --- pytorch_lightning/callbacks/pt_callbacks.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index dc4158f85013c..55a41e7e2aa44 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -86,8 +86,11 @@ class EarlyStopping(Callback): mode, the direction is automatically inferred from the name of the monitored quantity. Example: + >>> from pytorch_lightning import Trainer >>> from pytorch_lightning.callbacks import EarlyStopping - >>> EarlyStopping('val_loss') + >>> + >>> early_stopping = EarlyStopping('val_loss') + >>> Trainer(early_stop_callback=early_stopping) """ def __init__(self, monitor='val_loss', @@ -194,10 +197,13 @@ class ModelCheckpoint(Callback): period (int): Interval (number of epochs) between checkpoints. Example: + >>> from pytorch_lightning import Trainer >>> from pytorch_lightning.callbacks import ModelCheckpoint - >>> ModelCheckpoint(filepath='my_path') + >>> + >>> checkpoint_callback = ModelCheckpoint(filepath='my_path') + >>> Trainer(checkpoint_callback=checkpoint_callback) + >>> # saves checkpoints to my_path whenever 'val_loss' has a new min - """ def __init__(self, filepath, monitor='val_loss', verbose=0, @@ -350,9 +356,12 @@ class GradientAccumulationScheduler(Callback): scheduling (dict): scheduling in format {epoch: accumulation_factor} Example: + >>> from pytorch_lightning import Trainer >>> from pytorch_lightning.callbacks import GradientAccumulationScheduler + >>> >>> # at epoch 5 start accumulating every 2 batches - >>> GradientAccumulationScheduler(scheduling: {5: 2}) + >>> accumulator = GradientAccumulationScheduler(scheduling: {5: 2}) + >>> Trainer(accumulate_grad_batches=accumulator) """ def __init__(self, scheduling: dict): From 610edf8c3e24801e94a6672fcc02f9efe4b46a90 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 06:04:17 -0500 Subject: [PATCH 014/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 5 +- pytorch_lightning/trainer/trainer.py | 95 ++++++++++++++------------- 2 files changed, 54 insertions(+), 46 deletions(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 71138861096b9..318ec82d218c0 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,4 +1,7 @@ """ +Trainer +==================================== + The lightning Trainer abstracts best practices for running a training, val, test routine. It calls parts of your model when it wants to hand over full control and otherwise makes training assumptions which are now standard practice in AI research. @@ -18,4 +21,4 @@ The LightningTemplate holds the core computations, train, val, test loop, optimizer and dataloaders. This pattern de-couples the engineering from the science which makes your code reusable and free to run on any hardware. -""" \ No newline at end of file +""" diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 258b7cffe3aca..24b6d8880ccbe 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,4 +1,5 @@ + import os import sys import warnings @@ -47,7 +48,9 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): - r"""Abstract base class used to build new callbacks. + r""" + Main trainer class + """ def __init__( @@ -91,52 +94,54 @@ def __init__( truncated_bptt_steps=None, resume_from_checkpoint=None, ): + r""" + + Customize every aspect of training via flags """ + # Args: + # logger (ref:`Logger`): Logger for experiment tracking + # checkpoint_callback (ref:`Callback`): Callback for checkpointing + # :param early_stop_callback: Callback for early stopping + # :param str default_save_path: Default path for logs+weights if no logger/ckpt_callback passed + # :param int gradient_clip_val: 0 means don't clip. + # :param int gradient_clip: 0 means don't clip. Deprecated. + # :param process_position: shown in the tqdm bar + # :param int num_nodes: number of GPU nodes + # :param list|str|int gpus: int. (ie: 2 gpus) OR list to specify which GPUs [0, 1] OR '0,1' + # OR '-1' / -1 to use all available gpus + # :param str log_gpu_memory: None, 'min_max', 'all' + # :param bool show_progress_bar: If true shows tqdm bar + # :param float overfit_pct: uses this much of all datasets + # :param int track_grad_norm: -1 no tracking. Otherwise tracks that norm + # :param int check_val_every_n_epoch: check val every n train epochs + # :param bool fast_dev_run: runs full iteration over everything to find bugs + # :param int accumulate_grad_batches: Accumulates grads every k batches + # :param int max_epochs: + # :param int min_epochs: + # :param int train_percent_check: How much of train set to check + # :param int val_percent_check: How much of val set to check + # :param int test_percent_check: How much of test set to check + # :param float|int val_check_interval: If float, % of tng epoch. If int, check every n batch + # :param int log_save_interval: Writes logs to disk this often + # :param int row_log_interval: How often to add logging rows + # :param int add_row_log_interval: How often to add logging rows. Deprecated. + # :param str distributed_backend: Options: 'dp', 'ddp', 'ddp2'. + # :param bool use_amp: If true uses apex for 16bit precision + # :param bool print_nan_grads: Prints nan gradients + # :param str weights_summary: Options: 'full', 'top', None to not print. + # :param bool weights_save_path: Where to save weights if on cluster + # :param str amp_level: Check nvidia docs for level + # :param int num_sanity_val_steps: How many val steps before a full train loop. + # :param int truncated_bptt_steps: Enables multiple backward passes for each batch. + # + # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: + # - `gradient_clip`, + # - `nb_gpu_nodes`, + # - `max_nb_epochs`, + # - `min_nb_epochs`, + # - `add_row_log_interval`, + # - `nb_sanity_val_steps` - :param logger: Logger for experiment tracking - :param checkpoint_callback: Callback for checkpointing - :param early_stop_callback: Callback for early stopping - :param str default_save_path: Default path for logs+weights if no logger/ckpt_callback passed - :param int gradient_clip_val: 0 means don't clip. - :param int gradient_clip: 0 means don't clip. Deprecated. - :param process_position: shown in the tqdm bar - :param int num_nodes: number of GPU nodes - :param list|str|int gpus: int. (ie: 2 gpus) OR list to specify which GPUs [0, 1] OR '0,1' - OR '-1' / -1 to use all available gpus - :param str log_gpu_memory: None, 'min_max', 'all' - :param bool show_progress_bar: If true shows tqdm bar - :param float overfit_pct: uses this much of all datasets - :param int track_grad_norm: -1 no tracking. Otherwise tracks that norm - :param int check_val_every_n_epoch: check val every n train epochs - :param bool fast_dev_run: runs full iteration over everything to find bugs - :param int accumulate_grad_batches: Accumulates grads every k batches - :param int max_epochs: - :param int min_epochs: - :param int train_percent_check: How much of train set to check - :param int val_percent_check: How much of val set to check - :param int test_percent_check: How much of test set to check - :param float|int val_check_interval: If float, % of tng epoch. If int, check every n batch - :param int log_save_interval: Writes logs to disk this often - :param int row_log_interval: How often to add logging rows - :param int add_row_log_interval: How often to add logging rows. Deprecated. - :param str distributed_backend: Options: 'dp', 'ddp', 'ddp2'. - :param bool use_amp: If true uses apex for 16bit precision - :param bool print_nan_grads: Prints nan gradients - :param str weights_summary: Options: 'full', 'top', None to not print. - :param bool weights_save_path: Where to save weights if on cluster - :param str amp_level: Check nvidia docs for level - :param int num_sanity_val_steps: How many val steps before a full train loop. - :param int truncated_bptt_steps: Enables multiple backward passes for each batch. - - .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: - - `gradient_clip`, - - `nb_gpu_nodes`, - - `max_nb_epochs`, - - `min_nb_epochs`, - - `add_row_log_interval`, - - `nb_sanity_val_steps` - - """ # Transfer params # Backward compatibility if nb_gpu_nodes is not None: From 9f9bf65edeab5e4442446e216f0806f5f1dd1931 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 07:25:16 -0500 Subject: [PATCH 015/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 318ec82d218c0..4993c20c0c49b 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -22,3 +22,7 @@ This pattern de-couples the engineering from the science which makes your code reusable and free to run on any hardware. """ + +from .trainer import Trainer + +__all__ = ['Trainer'] From a3e47e74f5a60178dc4107b92604e269e4b3f6c8 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 15:12:54 -0500 Subject: [PATCH 016/155] updated gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 654e70b1cbc1a..43541c9dcbe80 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,8 @@ tests/save_dir default/ lightning_logs/ tests/tests/ +*.rst +/docs/source/*.md # Byte-compiled / optimized / DLL files __pycache__/ From b844e284570802afd03905293fc862a4699e56d8 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 18:35:01 -0500 Subject: [PATCH 017/155] updated links in ninja file --- docs/source/_templates/theme_variables.jinja | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/source/_templates/theme_variables.jinja b/docs/source/_templates/theme_variables.jinja index 4982f35867a49..3a67ad64d384d 100644 --- a/docs/source/_templates/theme_variables.jinja +++ b/docs/source/_templates/theme_variables.jinja @@ -1,17 +1,17 @@ {%- set external_urls = { - 'github': 'https://github.com/williamFalcon/pytorch-lightning', - 'github_issues': 'https://github.com/williamFalcon/pytorch-lightning/issues', - 'contributing': 'https://github.com/williamFalcon/pytorch-lightning/blob/master/CONTRIBUTING.md', - 'docs': 'https://williamfalcon.github.io/pytorch-lightning', + 'github': 'https://github.com/PytorchLightning/pytorch-lightning', + 'github_issues': 'https://github.com/PytorchLightning/pytorch-lightning/issues', + 'contributing': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/CONTRIBUTING.md', + 'docs': 'https://pytorchlightning.github.io/pytorch-lightning', 'twitter': 'https://twitter.com/PyTorchLightnin', 'discuss': 'https://discuss.pytorch.org', - 'tutorials': 'https://williamfalcon.github.io/pytorch-lightning/', - 'previous_pytorch_versions': 'https://williamfalcon.github.io/pytorch-lightning/', - 'home': 'https://williamfalcon.github.io/pytorch-lightning/', - 'get_started': 'https://williamfalcon.github.io/pytorch-lightning/', - 'features': 'https://williamfalcon.github.io/pytorch-lightning/', - 'blog': 'https://williamfalcon.github.io/pytorch-lightning/', - 'resources': 'https://williamfalcon.github.io/pytorch-lightning/', - 'support': 'https://williamfalcon.github.io/pytorch-lightning/', + 'tutorials': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'previous_pytorch_versions': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'home': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'get_started': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'features': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'blog': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'resources': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'support': 'https://pytorchlightning.github.io/pytorch-lightning/', } -%} From 0b416e96b0fb80bf95191d6fc491ba7969976918 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 19:44:02 -0500 Subject: [PATCH 018/155] updated docs --- docs/source/conf.py | 1 + docs/source/index.rst | 8 ++++---- docs/source/new-project.rst | 18 +++++++++++------- pytorch_lightning/trainer/__init__.py | 6 +++++- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index da0774a1d864d..fa3558ab7968c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -83,6 +83,7 @@ 'sphinx.ext.autosummary', 'sphinx.ext.napoleon', 'recommonmark', + 'sphinx.ext.autosectionlabel', # 'm2r', 'nbsphinx', ] diff --git a/docs/source/index.rst b/docs/source/index.rst index 0dd7a6af9f681..2fde388cd8d4b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,13 +3,13 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to PyTorch-Lightning! +PyTorch-Lightning Documentation ============================= .. toctree:: - :maxdepth: 4 + :maxdepth: 1 :name: start - :caption: Quick Start + :caption: Start Here new-project examples @@ -17,7 +17,7 @@ Welcome to PyTorch-Lightning! .. toctree:: :maxdepth: 4 :name: docs - :caption: Docs + :caption: Python API documentation diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 448e7e3817090..3bf8e3d45d945 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -1,13 +1,13 @@ Quick Start =========== -To start a new project define two files, a LightningModule and a Trainer file. -To illustrate Lightning power and simplicity, here's an example of a typical research flow. +| To start a new project define two files, a LightningModule and a Trainer file. +| To illustrate the power of Lightning and its simplicity, here's an example of a typical research flow. Case 1: BERT ------------ -Let's say you're working on something like BERT but want to try different ways of training or even different networks. -You would define a single LightningModule and use flags to switch between your different ideas. +| Let's say you're working on something like BERT but want to try different ways of training or even different networks. +| You would define a single LightningModule and use flags to switch between your different ideas. .. code-block:: python @@ -66,6 +66,10 @@ Then you could do rapid research by switching between these two and using the sa **Notice a few things about this flow:** -1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. -2. You get free GPU and 16-bit support without writing any of that code in your model. -3. You also get all of the capabilities below (without coding or testing yourself). +1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. +2. You get free GPU and 16-bit support without writing any of that code in your model. +3. You also get all of the capabilities below (without coding or testing yourself). + +- :ref:`Examples & Tutorials` +- :ref:`Examples & Tutorials` +- :ref:`Examples & Tutorials` diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index d38d208d9408b..3721a7627862d 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -17,4 +17,8 @@ trainer = Trainer() trainer.fit(model) -""" +The Trainer holds all the engineering code you might need such as distributing over GPUs or early stopping. +The LightningTemplate holds the core computations, train, val, test loop, optimizer and dataloaders. + +This pattern de-couples the engineering from the science which makes your code reusable and free to run on any hardware. +""" \ No newline at end of file From 891991e77e9de4bb2b0d6d54f2477b21525547ea Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:36:57 -0500 Subject: [PATCH 019/155] finished callbacks --- docs/source/new-project.rst | 2 +- pytorch_lightning/callbacks/pt_callbacks.py | 114 +++++++++++--------- 2 files changed, 67 insertions(+), 49 deletions(-) diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 3bf8e3d45d945..62de6879ae0b0 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -70,6 +70,6 @@ Then you could do rapid research by switching between these two and using the sa 2. You get free GPU and 16-bit support without writing any of that code in your model. 3. You also get all of the capabilities below (without coding or testing yourself). -- :ref:`Examples & Tutorials` +- :ref:`Callbacks` - :ref:`Examples & Tutorials` - :ref:`Examples & Tutorials` diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 1951719877199..dc4158f85013c 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -1,3 +1,9 @@ +""" +Callbacks +==================================== +Callbacks supported by Lightning +""" + import os import shutil import logging @@ -8,26 +14,7 @@ class Callback(object): - """Abstract base class used to build new callbacks. - - # Properties - * params: dict. Training parameters - (eg. verbosity, batch size, number of epochs...). - Reference of the model being trained. - - The `logs` dictionary that callback methods take as argument will contain keys - for quantities relevant to the current batch or epoch. - Currently, the `.fit()` method of the `Sequential` model class will include the following - quantities in the `logs` that it passes to its callbacks: - * on_epoch_end: logs include `acc` and `loss`, and - optionally include `val_loss` - (if validation is enabled in `fit`), and `val_acc` - (if validation and accuracy monitoring are enabled). - * on_batch_begin: logs include `size`, - the number of samples in the current batch. - * on_batch_end: logs include `loss`, and optionally `acc` - (if accuracy monitoring is enabled). - + r"""Abstract base class used to build new callbacks. """ def __init__(self): @@ -43,12 +30,29 @@ def set_model(self, model): self.model = model def on_epoch_begin(self, epoch, logs=None): + r""" + called when the epoch begins + + Args: + epoch (int): current epoch + logs (dict): key-value pairs of quantities to monitor + + Example: + >>> on_epoch_begin(epoch=2, logs={'val_loss': 0.2}) + """ pass def on_epoch_end(self, epoch, logs=None): pass def on_batch_begin(self, batch, logs=None): + r""" + called when the batch starts. + + Args: + batch (Tensor): current batch tensor + logs (dict): key-value pairs of quantities to monitor + """ pass def on_batch_end(self, batch, logs=None): @@ -62,25 +66,28 @@ def on_train_end(self, logs=None): class EarlyStopping(Callback): - """Stop training when a monitored quantity has stopped improving. + r""" + Stop training when a monitored quantity has stopped improving. - # Arguments - monitor: quantity to be monitored. - min_delta: minimum change in the monitored quantity + Args: + monitor (str): quantity to be monitored. + min_delta (float): minimum change in the monitored quantity to qualify as an improvement, i.e. an absolute change of less than min_delta, will count as no improvement. - patience: number of epochs with no improvement + patience (int): number of epochs with no improvement after which training will be stopped. - verbose: verbosity mode. - mode: one of {auto, min, max}. In `min` mode, + verbose (bool): verbosity mode. + mode (str): one of {auto, min, max}. In `min` mode, training will stop when the quantity monitored has stopped decreasing; in `max` mode it will stop when the quantity monitored has stopped increasing; in `auto` mode, the direction is automatically inferred from the name of the monitored quantity. - + Example: + >>> from pytorch_lightning.callbacks import EarlyStopping + >>> EarlyStopping('val_loss') """ def __init__(self, monitor='val_loss', @@ -150,20 +157,21 @@ def on_train_end(self, logs=None): class ModelCheckpoint(Callback): - """Save the model after every epoch. - - The `filepath` can contain named formatting options, - which will be filled the value of `epoch` and - keys in `logs` (passed in `on_epoch_end`). - For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, - then the model checkpoints will be saved with the epoch number and - the validation loss in the filename. - - # Arguments - filepath: string, path to save the model file. - monitor: quantity to monitor. - verbose: verbosity mode, 0 or 1. - save_top_k: if `save_top_k == k`, + r""" + + Save the model after every epoch. + + Args: + filepath (str): path to save the model file. + Can contain named formatting options to be auto-filled. + + Example: + >>> # save epoch and val_loss in name + >>> ModelCheckpoint(filepath='{epoch:02d}-{val_loss:.2f}.hdf5') + >>> # saves file like: /path/epoch_2-val_loss_0.2.hdf5 + monitor (str): quantity to monitor. + verbose (bool): verbosity mode, 0 or 1. + save_top_k (int): if `save_top_k == k`, the best k models according to the quantity monitored will be saved. if `save_top_k == 0`, no models are saved. @@ -172,7 +180,7 @@ class ModelCheckpoint(Callback): if `save_top_k >= 2` and the callback is called multiple times inside an epoch, the name of the saved file will be appended with a version count starting with `v0`. - mode: one of {auto, min, max}. + mode (str): one of {auto, min, max}. If `save_top_k != 0`, the decision to overwrite the current save file is made based on either the maximization or the @@ -180,11 +188,16 @@ class ModelCheckpoint(Callback): this should be `max`, for `val_loss` this should be `min`, etc. In `auto` mode, the direction is automatically inferred from the name of the monitored quantity. - save_weights_only: if True, then only the model's weights will be + save_weights_only (bool): if True, then only the model's weights will be saved (`model.save_weights(filepath)`), else the full model is saved (`model.save(filepath)`). - period: Interval (number of epochs) between checkpoints. + period (int): Interval (number of epochs) between checkpoints. + Example: + >>> from pytorch_lightning.callbacks import ModelCheckpoint + >>> ModelCheckpoint(filepath='my_path') + >>> # saves checkpoints to my_path whenever 'val_loss' has a new min + """ def __init__(self, filepath, monitor='val_loss', verbose=0, @@ -330,11 +343,16 @@ def on_epoch_end(self, epoch, logs=None): class GradientAccumulationScheduler(Callback): - """Change gradient accumulation factor according to scheduling. + r""" + Change gradient accumulation factor according to scheduling. - # Arguments - scheduling: dict, scheduling in format {epoch: accumulation_factor} + Args: + scheduling (dict): scheduling in format {epoch: accumulation_factor} + Example: + >>> from pytorch_lightning.callbacks import GradientAccumulationScheduler + >>> # at epoch 5 start accumulating every 2 batches + >>> GradientAccumulationScheduler(scheduling: {5: 2}) """ def __init__(self, scheduling: dict): From 24897506a076e64f7896552d8845f9dfad7dad7e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:50:46 -0500 Subject: [PATCH 020/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 1 - pytorch_lightning/trainer/auto_mix_precision.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 3721a7627862d..232e9a1c876ee 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,6 +1,5 @@ """ Trainer -======= The lightning trainer abstracts best practices for running a training, val, test routine. It calls parts of your model when it wants to hand over full control and otherwise makes diff --git a/pytorch_lightning/trainer/auto_mix_precision.py b/pytorch_lightning/trainer/auto_mix_precision.py index b28193c0bd12d..2915f2465fbb9 100644 --- a/pytorch_lightning/trainer/auto_mix_precision.py +++ b/pytorch_lightning/trainer/auto_mix_precision.py @@ -1,3 +1,4 @@ + from abc import ABC try: From c6c67a34ad6a52ab3bf08ef59362439e88e336e3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:57:04 -0500 Subject: [PATCH 021/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 232e9a1c876ee..8cb5aed318ab7 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,7 +1,4 @@ """ -Trainer - -The lightning trainer abstracts best practices for running a training, val, test routine. It calls parts of your model when it wants to hand over full control and otherwise makes training assumptions which are now standard practice in AI research. From f9285787a18cdaf7e59671a5a55d2de6993b9fd1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:17:16 -0500 Subject: [PATCH 022/155] fixed left menu --- docs/source/index.rst | 3 ++- pytorch_lightning/core/lightning.py | 5 +++++ pytorch_lightning/trainer/trainer.py | 5 +++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 2fde388cd8d4b..db1a051c48ec6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,7 +19,8 @@ PyTorch-Lightning Documentation :name: docs :caption: Python API - documentation + trainer + lightning-module .. toctree:: :maxdepth: 1 diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 3fca3968454ed..d22b6ed176fca 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1,3 +1,8 @@ +""" +LightningModule +==================================== + +""" import os import warnings import collections diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a1133004bc448..8a768abd68694 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,4 +1,7 @@ """ +AAA +==================================== + The trainer handles all the logic for running a val loop, training loop, distributing, etc.. . """ @@ -50,6 +53,8 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): + r"""Abstract base class used to build new callbacks. + """ def __init__( self, From d45f091a91246094cf1a14a55298e1b12d742441 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:29:30 -0500 Subject: [PATCH 023/155] added callbacks to menu --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index db1a051c48ec6..5687013ce45e4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -21,6 +21,7 @@ PyTorch-Lightning Documentation trainer lightning-module + callbacks .. toctree:: :maxdepth: 1 From 4b08974d4d02e20c63505f0c41114ec8b0f4d350 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:36:43 -0500 Subject: [PATCH 024/155] added direct links to docs --- docs/source/index.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 5687013ce45e4..c219884b353c8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -19,9 +19,10 @@ PyTorch-Lightning Documentation :name: docs :caption: Python API - trainer - lightning-module callbacks + lightning-module + logging + trainer .. toctree:: :maxdepth: 1 From d34de3890e269de1adbe53aaa756ed953808629b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:43:04 -0500 Subject: [PATCH 025/155] added direct links to docs --- pytorch_lightning/core/lightning.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index d22b6ed176fca..03c6a162f87c3 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1,8 +1,14 @@ """ -LightningModule -==================================== - +The LightningModule is the "system recipe." It groups the following in one file: + - computational system definition + - computations done on forward + - training loop + - validation loop + - testing loop + - train, val, test dataloaders + - optimizers """ + import os import warnings import collections From 31a3854e6ad6b190c21fb993cdbdf0672c6a5669 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:49:12 -0500 Subject: [PATCH 026/155] added direct links to docs --- pytorch_lightning/core/lightning.py | 2 -- pytorch_lightning/trainer/trainer.py | 6 ------ 2 files changed, 8 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 03c6a162f87c3..95345aadf7495 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -80,8 +80,6 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks): def __init__(self): # put the dimensions of the first input to your system self.example_input_array = torch.rand(5, 28 * 28) - - """ def __init__(self, *args, **kwargs): diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 8a768abd68694..258b7cffe3aca 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,9 +1,3 @@ -""" -AAA -==================================== - -The trainer handles all the logic for running a val loop, training loop, distributing, etc.. . -""" import os import sys From 5a1ca83570e6ccbae38e753912ab46e0b5cbb5f4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 22:01:11 -0500 Subject: [PATCH 027/155] added direct links to docs --- pytorch_lightning/callbacks/pt_callbacks.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index dc4158f85013c..55a41e7e2aa44 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -86,8 +86,11 @@ class EarlyStopping(Callback): mode, the direction is automatically inferred from the name of the monitored quantity. Example: + >>> from pytorch_lightning import Trainer >>> from pytorch_lightning.callbacks import EarlyStopping - >>> EarlyStopping('val_loss') + >>> + >>> early_stopping = EarlyStopping('val_loss') + >>> Trainer(early_stop_callback=early_stopping) """ def __init__(self, monitor='val_loss', @@ -194,10 +197,13 @@ class ModelCheckpoint(Callback): period (int): Interval (number of epochs) between checkpoints. Example: + >>> from pytorch_lightning import Trainer >>> from pytorch_lightning.callbacks import ModelCheckpoint - >>> ModelCheckpoint(filepath='my_path') + >>> + >>> checkpoint_callback = ModelCheckpoint(filepath='my_path') + >>> Trainer(checkpoint_callback=checkpoint_callback) + >>> # saves checkpoints to my_path whenever 'val_loss' has a new min - """ def __init__(self, filepath, monitor='val_loss', verbose=0, @@ -350,9 +356,12 @@ class GradientAccumulationScheduler(Callback): scheduling (dict): scheduling in format {epoch: accumulation_factor} Example: + >>> from pytorch_lightning import Trainer >>> from pytorch_lightning.callbacks import GradientAccumulationScheduler + >>> >>> # at epoch 5 start accumulating every 2 batches - >>> GradientAccumulationScheduler(scheduling: {5: 2}) + >>> accumulator = GradientAccumulationScheduler(scheduling: {5: 2}) + >>> Trainer(accumulate_grad_batches=accumulator) """ def __init__(self, scheduling: dict): From 637f2344de592ab2a880af32d7374905b98836e5 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 06:04:17 -0500 Subject: [PATCH 028/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 2 +- pytorch_lightning/trainer/trainer.py | 95 ++++++++++++++------------- 2 files changed, 51 insertions(+), 46 deletions(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 8cb5aed318ab7..30458845fb68f 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -17,4 +17,4 @@ The LightningTemplate holds the core computations, train, val, test loop, optimizer and dataloaders. This pattern de-couples the engineering from the science which makes your code reusable and free to run on any hardware. -""" \ No newline at end of file +""" diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 258b7cffe3aca..24b6d8880ccbe 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,4 +1,5 @@ + import os import sys import warnings @@ -47,7 +48,9 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): - r"""Abstract base class used to build new callbacks. + r""" + Main trainer class + """ def __init__( @@ -91,52 +94,54 @@ def __init__( truncated_bptt_steps=None, resume_from_checkpoint=None, ): + r""" + + Customize every aspect of training via flags """ + # Args: + # logger (ref:`Logger`): Logger for experiment tracking + # checkpoint_callback (ref:`Callback`): Callback for checkpointing + # :param early_stop_callback: Callback for early stopping + # :param str default_save_path: Default path for logs+weights if no logger/ckpt_callback passed + # :param int gradient_clip_val: 0 means don't clip. + # :param int gradient_clip: 0 means don't clip. Deprecated. + # :param process_position: shown in the tqdm bar + # :param int num_nodes: number of GPU nodes + # :param list|str|int gpus: int. (ie: 2 gpus) OR list to specify which GPUs [0, 1] OR '0,1' + # OR '-1' / -1 to use all available gpus + # :param str log_gpu_memory: None, 'min_max', 'all' + # :param bool show_progress_bar: If true shows tqdm bar + # :param float overfit_pct: uses this much of all datasets + # :param int track_grad_norm: -1 no tracking. Otherwise tracks that norm + # :param int check_val_every_n_epoch: check val every n train epochs + # :param bool fast_dev_run: runs full iteration over everything to find bugs + # :param int accumulate_grad_batches: Accumulates grads every k batches + # :param int max_epochs: + # :param int min_epochs: + # :param int train_percent_check: How much of train set to check + # :param int val_percent_check: How much of val set to check + # :param int test_percent_check: How much of test set to check + # :param float|int val_check_interval: If float, % of tng epoch. If int, check every n batch + # :param int log_save_interval: Writes logs to disk this often + # :param int row_log_interval: How often to add logging rows + # :param int add_row_log_interval: How often to add logging rows. Deprecated. + # :param str distributed_backend: Options: 'dp', 'ddp', 'ddp2'. + # :param bool use_amp: If true uses apex for 16bit precision + # :param bool print_nan_grads: Prints nan gradients + # :param str weights_summary: Options: 'full', 'top', None to not print. + # :param bool weights_save_path: Where to save weights if on cluster + # :param str amp_level: Check nvidia docs for level + # :param int num_sanity_val_steps: How many val steps before a full train loop. + # :param int truncated_bptt_steps: Enables multiple backward passes for each batch. + # + # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: + # - `gradient_clip`, + # - `nb_gpu_nodes`, + # - `max_nb_epochs`, + # - `min_nb_epochs`, + # - `add_row_log_interval`, + # - `nb_sanity_val_steps` - :param logger: Logger for experiment tracking - :param checkpoint_callback: Callback for checkpointing - :param early_stop_callback: Callback for early stopping - :param str default_save_path: Default path for logs+weights if no logger/ckpt_callback passed - :param int gradient_clip_val: 0 means don't clip. - :param int gradient_clip: 0 means don't clip. Deprecated. - :param process_position: shown in the tqdm bar - :param int num_nodes: number of GPU nodes - :param list|str|int gpus: int. (ie: 2 gpus) OR list to specify which GPUs [0, 1] OR '0,1' - OR '-1' / -1 to use all available gpus - :param str log_gpu_memory: None, 'min_max', 'all' - :param bool show_progress_bar: If true shows tqdm bar - :param float overfit_pct: uses this much of all datasets - :param int track_grad_norm: -1 no tracking. Otherwise tracks that norm - :param int check_val_every_n_epoch: check val every n train epochs - :param bool fast_dev_run: runs full iteration over everything to find bugs - :param int accumulate_grad_batches: Accumulates grads every k batches - :param int max_epochs: - :param int min_epochs: - :param int train_percent_check: How much of train set to check - :param int val_percent_check: How much of val set to check - :param int test_percent_check: How much of test set to check - :param float|int val_check_interval: If float, % of tng epoch. If int, check every n batch - :param int log_save_interval: Writes logs to disk this often - :param int row_log_interval: How often to add logging rows - :param int add_row_log_interval: How often to add logging rows. Deprecated. - :param str distributed_backend: Options: 'dp', 'ddp', 'ddp2'. - :param bool use_amp: If true uses apex for 16bit precision - :param bool print_nan_grads: Prints nan gradients - :param str weights_summary: Options: 'full', 'top', None to not print. - :param bool weights_save_path: Where to save weights if on cluster - :param str amp_level: Check nvidia docs for level - :param int num_sanity_val_steps: How many val steps before a full train loop. - :param int truncated_bptt_steps: Enables multiple backward passes for each batch. - - .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: - - `gradient_clip`, - - `nb_gpu_nodes`, - - `max_nb_epochs`, - - `min_nb_epochs`, - - `add_row_log_interval`, - - `nb_sanity_val_steps` - - """ # Transfer params # Backward compatibility if nb_gpu_nodes is not None: From 3e5c5f9588700fd0c6b2ef8f6507913978eb2eb2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 07:25:16 -0500 Subject: [PATCH 029/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 30458845fb68f..9ce668eae7be6 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -18,3 +18,7 @@ This pattern de-couples the engineering from the science which makes your code reusable and free to run on any hardware. """ + +from .trainer import Trainer + +__all__ = ['Trainer'] From d2474313007e71a736ba0e646b702151e110b0dc Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 07:45:36 -0500 Subject: [PATCH 030/155] finished rebase --- pytorch_lightning/trainer/__init__.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 9ce668eae7be6..893b5cbe1d2ca 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,6 +1,17 @@ """ - It calls parts of your model when it wants to hand over full control and otherwise makes - training assumptions which are now standard practice in AI research. + +The trainer de-couples the engineering code (16-bit, early stopping, GPU distribution, etc...) from the +science code (GAN, BERT, your project, etc...). It uses many assumptions which are best practices in +AI research today. + +The trainer automates all parts of training except: + +- what happens in training , test, val loop +- where the data come from +- which optimizers to use +- how to do the computations + +The Trainer delegates those calls to your LightningModule which defines how to do those parts. This is the basic use of the trainer: @@ -8,15 +19,10 @@ from pytorch_lightning import Trainer - model = LightningTemplate() + model = MyLightningModule() trainer = Trainer() trainer.fit(model) - -The Trainer holds all the engineering code you might need such as distributing over GPUs or early stopping. -The LightningTemplate holds the core computations, train, val, test loop, optimizer and dataloaders. - -This pattern de-couples the engineering from the science which makes your code reusable and free to run on any hardware. """ from .trainer import Trainer From 145ea2713de932c4e4d7d382dca94779bced350c Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 08:06:12 -0500 Subject: [PATCH 031/155] making private members --- pytorch_lightning/trainer/trainer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 24b6d8880ccbe..a55ac84c29363 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -416,7 +416,7 @@ def fit(self, model): # CHOOSE OPTIMIZER # allow for lr schedulers as well - self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) + self.optimizers, self.lr_schedulers = self.__init_optimizers(model.configure_optimizers()) self.run_pretrain_routine(model) @@ -424,7 +424,7 @@ def fit(self, model): # used for testing or when we need to know that training succeeded return 1 - def init_optimizers(self, optimizers): + def __init_optimizers(self, optimizers): # single optimizer if isinstance(optimizers, Optimizer): @@ -433,14 +433,14 @@ def init_optimizers(self, optimizers): # two lists elif len(optimizers) == 2 and isinstance(optimizers[0], list): optimizers, lr_schedulers = optimizers - lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.configure_schedulers(lr_schedulers) + lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.__configure_schedulers(lr_schedulers) return optimizers, lr_schedulers # single list or tuple elif isinstance(optimizers, list) or isinstance(optimizers, tuple): return optimizers, [] - def configure_schedulers(self, schedulers): + def __configure_schedulers(self, schedulers): for i, scheduler in enumerate(schedulers): if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): reduce_lr_on_plateau_scheduler = schedulers.pop(i) From f80d24c188720f865f09a97c9308a4c5edfbb65f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 08:11:04 -0500 Subject: [PATCH 032/155] making private members --- pytorch_lightning/trainer/trainer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a55ac84c29363..24b6d8880ccbe 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -416,7 +416,7 @@ def fit(self, model): # CHOOSE OPTIMIZER # allow for lr schedulers as well - self.optimizers, self.lr_schedulers = self.__init_optimizers(model.configure_optimizers()) + self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) self.run_pretrain_routine(model) @@ -424,7 +424,7 @@ def fit(self, model): # used for testing or when we need to know that training succeeded return 1 - def __init_optimizers(self, optimizers): + def init_optimizers(self, optimizers): # single optimizer if isinstance(optimizers, Optimizer): @@ -433,14 +433,14 @@ def __init_optimizers(self, optimizers): # two lists elif len(optimizers) == 2 and isinstance(optimizers[0], list): optimizers, lr_schedulers = optimizers - lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.__configure_schedulers(lr_schedulers) + lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.configure_schedulers(lr_schedulers) return optimizers, lr_schedulers # single list or tuple elif isinstance(optimizers, list) or isinstance(optimizers, tuple): return optimizers, [] - def __configure_schedulers(self, schedulers): + def configure_schedulers(self, schedulers): for i, scheduler in enumerate(schedulers): if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): reduce_lr_on_plateau_scheduler = schedulers.pop(i) From f700912f722d2c303efff2a136183bba66b29b6d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 08:26:53 -0500 Subject: [PATCH 033/155] making private members --- pytorch_lightning/trainer/trainer.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 24b6d8880ccbe..38d2400798857 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -48,11 +48,6 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): - r""" - Main trainer class - - """ - def __init__( self, logger=True, @@ -95,18 +90,21 @@ def __init__( resume_from_checkpoint=None, ): r""" - + Customize every aspect of training via flags + + Args: + logger (:class:`.Logger`): Logger for experiment tracking + example: + >>> asd + checkpoint_callback (:class:`CheckpointCallback`): Callback for checkpointing + early_stop_callback (:class:`.EarlyStopping`): Callback for early stopping + default_save_path (str): Default path for logs+weights if no logger/ckpt_callback passed + gradient_clip_val (int): 0 means don't clip. + gradient_clip (int): 0 means don't clip. Deprecated. + process_position (int): shown in the tqdm bar + num_nodes (int): number of GPU nodes """ - # Args: - # logger (ref:`Logger`): Logger for experiment tracking - # checkpoint_callback (ref:`Callback`): Callback for checkpointing - # :param early_stop_callback: Callback for early stopping - # :param str default_save_path: Default path for logs+weights if no logger/ckpt_callback passed - # :param int gradient_clip_val: 0 means don't clip. - # :param int gradient_clip: 0 means don't clip. Deprecated. - # :param process_position: shown in the tqdm bar - # :param int num_nodes: number of GPU nodes # :param list|str|int gpus: int. (ie: 2 gpus) OR list to specify which GPUs [0, 1] OR '0,1' # OR '-1' / -1 to use all available gpus # :param str log_gpu_memory: None, 'min_max', 'all' From 365558c824037373d35a04f3e8ec3794f8a8448d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 09:26:35 -0500 Subject: [PATCH 034/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 64 ++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 38d2400798857..a424f3f4c8dad 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -94,15 +94,63 @@ def __init__( Customize every aspect of training via flags Args: - logger (:class:`.Logger`): Logger for experiment tracking - example: - >>> asd - checkpoint_callback (:class:`CheckpointCallback`): Callback for checkpointing + logger (:class:`.Logger`): Logger for experiment tracking. + Example:: + from pytorch_lightning.logging import TensorBoardLogger + + # default logger used by trainer + logger = TensorBoardLogger( + save_dir=os.getcwd(), + version=self.slurm_job_id, + name='lightning_logs' + ) + + Trainer(logger=logger) + checkpoint_callback (:class:`CheckpointCallback`): Callback for checkpointing. + Example:: + from pytorch_lightning.callbacks import ModelCheckpoint + + # default used by the Trainer + checkpoint_callback = ModelCheckpoint( + filepath=os.getcwd(), + save_best_only=True, + verbose=True, + monitor='val_loss', + mode='min', + prefix='' + ) + + trainer = Trainer(checkpoint_callback=checkpoint_callback) early_stop_callback (:class:`.EarlyStopping`): Callback for early stopping - default_save_path (str): Default path for logs+weights if no logger/ckpt_callback passed - gradient_clip_val (int): 0 means don't clip. - gradient_clip (int): 0 means don't clip. Deprecated. - process_position (int): shown in the tqdm bar + Example:: + from pytorch_lightning.callbacks import EarlyStopping + + # default used by the Trainer + early_stop_callback = EarlyStopping( + monitor='val_loss', + patience=3, + verbose=True, + mode='min' + ) + + trainer = Trainer(early_stop_callback=early_stop_callback) + default_save_path (str): Default path for logs and weights when no logger/ckpt_callback passed + Example:: + # default used by the Trainer + trainer = Trainer(default_save_path=os.getcwd()) + gradient_clip_val (float): 0 means don't clip. + Example:: + # default used by the Trainer + trainer = Trainer(gradient_clip_val=0.0) + gradient_clip (int): + .. deprecated:: 0.5.0 + Use `gradient_clip_val` instead. + + process_position (int): orders the tqdm bar when running multiple models on same machine. + Example:: + # default used by the Trainer + trainer = Trainer(process_position=0) + num_nodes (int): number of GPU nodes """ # :param list|str|int gpus: int. (ie: 2 gpus) OR list to specify which GPUs [0, 1] OR '0,1' From 87d9c21eb71545dab3f3004b6a7dd9ebfcefe709 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 09:50:13 -0500 Subject: [PATCH 035/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 113 +++++++++++++++++++++++---- 1 file changed, 99 insertions(+), 14 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a424f3f4c8dad..97479c47aa258 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -144,26 +144,111 @@ def __init__( trainer = Trainer(gradient_clip_val=0.0) gradient_clip (int): .. deprecated:: 0.5.0 - Use `gradient_clip_val` instead. + Use `gradient_clip_val` instead. Will remove 0.8.0. process_position (int): orders the tqdm bar when running multiple models on same machine. Example:: # default used by the Trainer trainer = Trainer(process_position=0) - - num_nodes (int): number of GPU nodes + + num_nodes (int): number of GPU nodes for distributed training. + Example:: + # default used by the Trainer + trainer = Trainer(num_nodes=1) + + # to train on 8 nodes + trainer = Trainer(num_nodes=8) + gpus (list|str|int): Which GPUs to train on. + Example:: + # default used by the Trainer (ie: train on CPU) + trainer = Trainer(gpus=None) + + # int: train on 2 gpus + trainer = Trainer(gpus=2) + + # list: train on GPUs 1, 4 (by bus ordering) + trainer = Trainer(gpus=[1, 4]) + trainer = Trainer(gpus='1, 4') # equivalent + + # -1: train on all gpus + trainer = Trainer(gpus=-1) + trainer = Trainer(gpus='-1') # equivalent + + # combine with num_nodes to train on multiple GPUs across nodes + trainer = Trainer(gpus=2, num_nodes=4) # uses 8 gpus in total + + log_gpu_memory (str): None, 'min_max', 'all'. Might slow performance + because it uses the output of nvidia-smi. + Example:: + # default used by the Trainer + trainer = Trainer(log_gpu_memory=None) + + # log all the GPUs (on master node only) + trainer = Trainer(log_gpu_memory='all') + + # log only the min and max memory on the master node + trainer = Trainer(log_gpu_memory='min_max') + + show_progress_bar (bool): If true shows tqdm progress bar + Example:: + # default used by the Trainer + trainer = Trainer(show_progress_bar=True) + + overfit_pct (float): uses this much data of all datasets. + Example:: + # default used by the Trainer + trainer = Trainer(overfit_pct=0.0) + + # use only 1% of the train, test, val datasets + trainer = Trainer(overfit_pct=0.01) + + track_grad_norm (int): -1 no tracking. Otherwise tracks that norm + Example:: + # default used by the Trainer + trainer = Trainer(track_grad_norm=-1) + + # track the 2-norm + trainer = Trainer(track_grad_norm=2) + + check_val_every_n_epoch (int): check val every n train epochs + Example:: + # default used by the Trainer + trainer = Trainer(check_val_every_n_epoch=1) + + # run val loop every 10 training epochs + trainer = Trainer(check_val_every_n_epoch=10) + + fast_dev_run (bool): runs 1 batch of train, test and val to find any bugs (ie: a sort of unit test). + Example:: + # default used by the Trainer + trainer = Trainer(fast_dev_run=False) + + # runs 1 train, val, test batch and program ends + trainer = Trainer(fast_dev_run=True) + + accumulate_grad_batches (int|dict): Accumulates grads every k batches or as set up in the dict. + Example:: + # default used by the Trainer (no accumulation) + trainer = Trainer(accumulate_grad_batches=1) + + # accumulate every 4 batches (effective batch size is batch*4) + trainer = Trainer(accumulate_grad_batches=4) + + # no accumulation for epochs 1-4. accumulate 3 for epochs 5-10. accumulate 20 after that + trainer = Trainer(accumulate_grad_batches={5: 3, 10: 20}) + + max_epochs (int): Stop training once this number of epochs is reached + Example:: + # default used by the Trainer + trainer = Trainer(max_epochs=1000) + + min_epochs (int): Force training for at least these many epochs + Example:: + # default used by the Trainer + trainer = Trainer(min_epochs=1) + + """ - # :param list|str|int gpus: int. (ie: 2 gpus) OR list to specify which GPUs [0, 1] OR '0,1' - # OR '-1' / -1 to use all available gpus - # :param str log_gpu_memory: None, 'min_max', 'all' - # :param bool show_progress_bar: If true shows tqdm bar - # :param float overfit_pct: uses this much of all datasets - # :param int track_grad_norm: -1 no tracking. Otherwise tracks that norm - # :param int check_val_every_n_epoch: check val every n train epochs - # :param bool fast_dev_run: runs full iteration over everything to find bugs - # :param int accumulate_grad_batches: Accumulates grads every k batches - # :param int max_epochs: - # :param int min_epochs: # :param int train_percent_check: How much of train set to check # :param int val_percent_check: How much of val set to check # :param int test_percent_check: How much of test set to check From 8883f031acfe4a016079f764777e03157c291741 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 09:55:55 -0500 Subject: [PATCH 036/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 29 +++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 97479c47aa258..aa0adfa398f9e 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -247,11 +247,34 @@ def __init__( # default used by the Trainer trainer = Trainer(min_epochs=1) + train_percent_check (int): How much of training dataset to check. + Useful when debugging or testing something that happens at the end of an epoch. + Example:: + # default used by the Trainer + trainer = Trainer(train_percent_check=1.0) + + # run through only 25% of the training set each epoch + trainer = Trainer(train_percent_check=0.25) + + val_percent_check (int): How much of validation dataset to check. + Useful when debugging or testing something that happens at the end of an epoch. + Example:: + # default used by the Trainer + trainer = Trainer(val_percent_check=1.0) + + # run through only 25% of the validation set each epoch + trainer = Trainer(val_percent_check=0.25) + + test_percent_check (int): How much of test dataset to check. + Useful when debugging or testing something that happens at the end of an epoch. + Example:: + # default used by the Trainer + trainer = Trainer(test_percent_check=1.0) + + # run through only 25% of the test set each epoch + trainer = Trainer(test_percent_check=0.25) """ - # :param int train_percent_check: How much of train set to check - # :param int val_percent_check: How much of val set to check - # :param int test_percent_check: How much of test set to check # :param float|int val_check_interval: If float, % of tng epoch. If int, check every n batch # :param int log_save_interval: Writes logs to disk this often # :param int row_log_interval: How often to add logging rows From 80454d3635922a5f646c4a0547fbc2b4e2c57c0a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:13:48 -0500 Subject: [PATCH 037/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 94 +++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 10 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index aa0adfa398f9e..74faa1bc74b80 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -274,17 +274,91 @@ def __init__( # run through only 25% of the test set each epoch trainer = Trainer(test_percent_check=0.25) - """ - # :param float|int val_check_interval: If float, % of tng epoch. If int, check every n batch - # :param int log_save_interval: Writes logs to disk this often - # :param int row_log_interval: How often to add logging rows - # :param int add_row_log_interval: How often to add logging rows. Deprecated. - # :param str distributed_backend: Options: 'dp', 'ddp', 'ddp2'. - # :param bool use_amp: If true uses apex for 16bit precision - # :param bool print_nan_grads: Prints nan gradients - # :param str weights_summary: Options: 'full', 'top', None to not print. - # :param bool weights_save_path: Where to save weights if on cluster + val_check_interval (float|int): How often within one training epoch to check the validation set + If float, % of tng epoch. If int, check every n batch + Example:: + # default used by the Trainer + trainer = Trainer(val_check_interval=1.0) + + # check validation set 4 times during a training epoch + trainer = Trainer(val_check_interval=0.25) + + # check validation set every 1000 training batches + # use this when using iterableDataset and your dataset has no length + # (ie: production cases with streaming data) + trainer = Trainer(val_check_interval=1000) + + log_save_interval (int): Writes logs to disk this often + Example:: + # default used by the Trainer + trainer = Trainer(log_save_interval=100) + + row_log_interval (int): How often to add logging rows (does not write to disk) + Example:: + # default used by the Trainer + trainer = Trainer(row_log_interval=10) + + add_row_log_interval (int): + .. deprecated:: 0.5.0 + Use `row_log_interval` instead. Will remove 0.8.0. + + distributed_backend (str): The distributed backend to use. + Options: 'dp', 'ddp', 'ddp2'. + Example:: + # default used by the Trainer + trainer = Trainer(distributed_backend=None) + + # dp = DataParallel (split a batch onto k gpus on same machine). + trainer = Trainer(gpus=2, distributed_backend='dp') + + # ddp = DistributedDataParallel + # Each gpu trains by itself on a subset of the data. + # Gradients sync across all gpus and all machines. + trainer = Trainer(gpus=2, num_nodes=2, distributed_backend='ddp') + + # ddp2 = DistributedDataParallel + dp + # behaves like dp on every node + # syncs gradients across nodes like ddp + # useful for things like increasing the number of negative samples + trainer = Trainer(gpus=2, num_nodes=2, distributed_backend='ddp2') + + use_amp (bool): If true uses apex for 16bit precision + Example:: + # default used by the Trainer + trainer = Trainer(use_amp=False) + + print_nan_grads (bool): Prints gradients with nan values + Example:: + # default used by the Trainer + trainer = Trainer(print_nan_grads=False) + + weights_summary (str): Prints a summary of the weights when training begins. + Options: 'full', 'top', None. + Example:: + # default used by the Trainer (ie: print all weights) + trainer = Trainer(weights_summary='full') + + # print only the top level modules + trainer = Trainer(weights_summary='top') + + # don't print a summary + trainer = Trainer(weights_summary=None) + + weights_save_path (str): Where to save weights if specified. + Example:: + # default used by the Trainer + trainer = Trainer(weights_save_path=os.getcwd()) + + # save to your custom path + trainer = Trainer(weights_save_path='my/path') + + # if checkpoint callback used, then overrides the weights path + # **NOTE: this saves weights to some/path NOT my/path + checkpoint_callback = ModelCheckpoint(filepath='some/path') + trainer = Trainer(checkpoint_callback=checkpoint_callback, weights_save_path='my/path') + # :param str amp_level: Check nvidia docs for level + """ # :param int num_sanity_val_steps: How many val steps before a full train loop. # :param int truncated_bptt_steps: Enables multiple backward passes for each batch. # From 1b12cb83dcce889c01454e60fdfec35a28a3eaaf Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:14:29 -0500 Subject: [PATCH 038/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 74faa1bc74b80..b9c0f55b60496 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -355,7 +355,10 @@ def __init__( # if checkpoint callback used, then overrides the weights path # **NOTE: this saves weights to some/path NOT my/path checkpoint_callback = ModelCheckpoint(filepath='some/path') - trainer = Trainer(checkpoint_callback=checkpoint_callback, weights_save_path='my/path') + trainer = Trainer( + checkpoint_callback=checkpoint_callback, + weights_save_path='my/path' + ) # :param str amp_level: Check nvidia docs for level """ From 0b0f2c01e1b623abf811465d265efa221d785710 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:16:17 -0500 Subject: [PATCH 039/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b9c0f55b60496..add1c849765ca 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -360,7 +360,11 @@ def __init__( weights_save_path='my/path' ) - # :param str amp_level: Check nvidia docs for level + amp_level (str): The optimization level to use (O1, O2, etc...). + Check nvidia docs for level (https://nvidia.github.io/apex/amp.html#opt-levels) + Example:: + # default used by the Trainer + trainer = Trainer(amp_level='O1') """ # :param int num_sanity_val_steps: How many val steps before a full train loop. # :param int truncated_bptt_steps: Enables multiple backward passes for each batch. From cebcd3039cd6c9f6c007a8e280261be865f01fd3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:25:51 -0500 Subject: [PATCH 040/155] set auto dp if no backend --- pytorch_lightning/trainer/trainer.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index add1c849765ca..a56aa2ae19dc4 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -359,15 +359,37 @@ def __init__( checkpoint_callback=checkpoint_callback, weights_save_path='my/path' ) - + amp_level (str): The optimization level to use (O1, O2, etc...). Check nvidia docs for level (https://nvidia.github.io/apex/amp.html#opt-levels) Example:: # default used by the Trainer trainer = Trainer(amp_level='O1') + + num_sanity_val_steps (int): Sanity check runs n batches of val before starting the training routine. + This catches any bugs in your validation without having to wait for the first validation check. + The Trainer uses 5 steps by default. Turn it off or modify it here. + Example:: + # default used by the Trainer + trainer = Trainer(num_sanity_val_steps=5) + + # turn it off + trainer = Trainer(num_sanity_val_steps=0) + + truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of a much longer sequence + If this is enabled, your batches will automatically get truncated + and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence dimension. + + `Williams, Ronald J., and Jing Peng. "An efficient gradient-based algorithm for on-line training of recurrent network trajectories." + `_ + + Example:: + # default used by the Trainer (ie: disabled) + trainer = Trainer(truncated_bptt_steps=None) + + # backprop every 5 steps in a batch + trainer = Trainer(truncated_bptt_steps=5) """ - # :param int num_sanity_val_steps: How many val steps before a full train loop. - # :param int truncated_bptt_steps: Enables multiple backward passes for each batch. # # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: # - `gradient_clip`, From 59e22a9225886c1e08ee35b023095ed8cf2a7bf2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:32:41 -0500 Subject: [PATCH 041/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 30 +++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a56aa2ae19dc4..b071cbe8d5c6b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -158,6 +158,11 @@ def __init__( # to train on 8 nodes trainer = Trainer(num_nodes=8) + + nb_gpu_nodes (int): + .. deprecated:: 0.5.0 + Use `num_nodes` instead. Will remove 0.8.0. + gpus (list|str|int): Which GPUs to train on. Example:: # default used by the Trainer (ie: train on CPU) @@ -241,11 +246,19 @@ def __init__( Example:: # default used by the Trainer trainer = Trainer(max_epochs=1000) + + max_nb_epochs (int): + .. deprecated:: 0.5.0 + Use `max_epochs` instead. Will remove 0.8.0. min_epochs (int): Force training for at least these many epochs Example:: # default used by the Trainer trainer = Trainer(min_epochs=1) + + min_nb_epochs (int): + .. deprecated:: 0.5.0 + Use `min_nb_epochs` instead. Will remove 0.8.0. train_percent_check (int): How much of training dataset to check. Useful when debugging or testing something that happens at the end of an epoch. @@ -376,6 +389,10 @@ def __init__( # turn it off trainer = Trainer(num_sanity_val_steps=0) + nb_sanity_val_steps (int): + .. deprecated:: 0.5.0 + Use `num_sanity_val_steps` instead. Will remove 0.8.0. + truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of a much longer sequence If this is enabled, your batches will automatically get truncated and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence dimension. @@ -389,14 +406,17 @@ def __init__( # backprop every 5 steps in a batch trainer = Trainer(truncated_bptt_steps=5) + + resume_from_checkpoint (str): To resume training from a specific checkpoint pass in the path here.k + Example:: + # default used by the Trainer + trainer = Trainer(resume_from_checkpoint=None) + + # resume from a specific checkpoint + trainer = Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt') """ # # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: - # - `gradient_clip`, - # - `nb_gpu_nodes`, - # - `max_nb_epochs`, - # - `min_nb_epochs`, - # - `add_row_log_interval`, # - `nb_sanity_val_steps` # Transfer params From 19e13e776b30c602fd0c82cab2b885255cb376de Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:33:19 -0500 Subject: [PATCH 042/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b071cbe8d5c6b..866ccd3b3dedc 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -403,15 +403,15 @@ def __init__( Example:: # default used by the Trainer (ie: disabled) trainer = Trainer(truncated_bptt_steps=None) - + # backprop every 5 steps in a batch trainer = Trainer(truncated_bptt_steps=5) - + resume_from_checkpoint (str): To resume training from a specific checkpoint pass in the path here.k Example:: # default used by the Trainer trainer = Trainer(resume_from_checkpoint=None) - + # resume from a specific checkpoint trainer = Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt') """ From 62d213ab3c3106ac35a44926da4c3cd0a76c446b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:40:16 -0500 Subject: [PATCH 043/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 866ccd3b3dedc..7bbada8cc5455 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -665,6 +665,16 @@ def tng_tqdm_dic(self): # MODEL TRAINING # ----------------------------- def fit(self, model): + r""" + Runs the full optimization routine. + + Example:: + + trainer = Trainer() + model = LightningModule() + + trainer.fit() + """ # when using multi-node or DDP within a node start each module in a separate process if self.use_ddp2: task = int(os.environ['SLURM_LOCALID']) @@ -820,6 +830,30 @@ def run_pretrain_routine(self, model): self.train() def test(self, model=None): + r""" + + Separates from fit to make sure you never run on your test set until you want to. + + Args: + model (LightningModule): The model to test. + + Example:: + + # Option 1: + # run test after fitting + trainer = Trainer() + model = LightningModule() + + trainer.fit() + trainer.test() + + # Option 2: + # run test from a model loaded + model = LightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') + trainer = Trainer() + + trainer.test(model) + """ self.testing = True if model is not None: self.fit(model) From ed91ad6407378257e5bfbd18d50179991cf34713 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:41:05 -0500 Subject: [PATCH 044/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 7bbada8cc5455..ed8b935d69991 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -839,7 +839,7 @@ def test(self, model=None): Example:: - # Option 1: + # Option 1 # run test after fitting trainer = Trainer() model = LightningModule() @@ -847,8 +847,8 @@ def test(self, model=None): trainer.fit() trainer.test() - # Option 2: - # run test from a model loaded + # Option 2 + # run test from a loaded model model = LightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') trainer = Trainer() From 1c7a2204329f70d3ed9c61513b0c5bc8bcd86728 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:47:12 -0500 Subject: [PATCH 045/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index ed8b935d69991..0885d2319225a 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -400,6 +400,7 @@ def __init__( `Williams, Ronald J., and Jing Peng. "An efficient gradient-based algorithm for on-line training of recurrent network trajectories." `_ + Example:: # default used by the Trainer (ie: disabled) trainer = Trainer(truncated_bptt_steps=None) From d05f805b2546c2548c498b84adbe814d253941ab Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:47:39 -0500 Subject: [PATCH 046/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 0885d2319225a..8e932e37cf5bc 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -396,7 +396,6 @@ def __init__( truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of a much longer sequence If this is enabled, your batches will automatically get truncated and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence dimension. - `Williams, Ronald J., and Jing Peng. "An efficient gradient-based algorithm for on-line training of recurrent network trajectories." `_ From f353b021b56b09b5397c6276f36f40d46fe22133 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:51:15 -0500 Subject: [PATCH 047/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 8e932e37cf5bc..565754f416c88 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -396,10 +396,8 @@ def __init__( truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of a much longer sequence If this is enabled, your batches will automatically get truncated and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence dimension. - `Williams, Ronald J., and Jing Peng. "An efficient gradient-based algorithm for on-line training of recurrent network trajectories." - `_ - - + (`Williams et al. "An efficient gradient-based algorithm for on-line training of recurrent network trajectories." + `_) Example:: # default used by the Trainer (ie: disabled) trainer = Trainer(truncated_bptt_steps=None) From 6a414195fd1f99f285cd355afee6314a20c3f762 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 13:36:48 -0500 Subject: [PATCH 048/155] working on trainer docs --- pytorch_lightning/core/__init__.py | 304 ++++++++++++++-------------- pytorch_lightning/core/lightning.py | 119 +++++------ 2 files changed, 211 insertions(+), 212 deletions(-) diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index c2694eabf5758..9b20b562865b5 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -1,150 +1,158 @@ """ -Lightning Module interface -========================== - -A lightning module is a strict superclass of nn.Module, it provides a standard interface - for the trainer to interact with the model. - -The easiest thing to do is copy the minimal example below and modify accordingly. - -Otherwise, to Define a Lightning Module, implement the following methods: - - -Minimal example ---------------- - -.. code-block:: python - - import os - import torch - from torch.nn import functional as F - from torch.utils.data import DataLoader - from torchvision.datasets import MNIST - import torchvision.transforms as transforms - - import pytorch_lightning as pl - - class CoolModel(pl.LightningModule): - - def __init__(self): - super(CoolModel, self).__init__() - # not the best model... - self.l1 = torch.nn.Linear(28 * 28, 10) - - def forward(self, x): - return torch.relu(self.l1(x.view(x.size(0), -1))) - - def training_step(self, batch, batch_idx): - # REQUIRED - x, y = batch - y_hat = self.forward(x) - return {'loss': F.cross_entropy(y_hat, y)} - - def validation_step(self, batch, batch_idx): - # OPTIONAL - x, y = batch - y_hat = self.forward(x) - return {'val_loss': F.cross_entropy(y_hat, y)} - - def validation_end(self, outputs): - # OPTIONAL - val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() - return {'val_loss': val_loss_mean} - - def test_step(self, batch, batch_idx): - # OPTIONAL - x, y = batch - y_hat = self.forward(x) - return {'test_loss': F.cross_entropy(y_hat, y)} - - def test_end(self, outputs): - # OPTIONAL - test_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() - return {'test_loss': test_loss_mean} - - def configure_optimizers(self): - # REQUIRED - return torch.optim.Adam(self.parameters(), lr=0.02) - - @pl.data_loader - def train_dataloader(self): - return DataLoader(MNIST(os.getcwd(), train=True, download=True, - transform=transforms.ToTensor()), batch_size=32) - - @pl.data_loader - def val_dataloader(self): - # OPTIONAL - # can also return a list of val dataloaders - return DataLoader(MNIST(os.getcwd(), train=True, download=True, - transform=transforms.ToTensor()), batch_size=32) - - @pl.data_loader - def test_dataloader(self): - # OPTIONAL - # can also return a list of test dataloaders - return DataLoader(MNIST(os.getcwd(), train=False, download=True, - transform=transforms.ToTensor()), batch_size=32) - - -How do these methods fit into the broader training? ---------------------------------------------------- - -The LightningModule interface is on the right. Each method corresponds - to a part of a research project. Lightning automates everything not in blue. - -.. figure:: docs/source/_static/images/overview_flat.jpg - :align: center - - Overview. - - -Optional Methods ----------------- - -**add_model_specific_args** - -.. code-block:: python - - @staticmethod - def add_model_specific_args(parent_parser, root_dir) - -Lightning has a list of default argparse commands. - This method is your chance to add or modify commands specific to your model. - The `hyperparameter argument parser - `_ - is available anywhere in your model by calling self.hparams. - -**Return** -An argument parser - -**Example** - -.. code-block:: python - - @staticmethod - def add_model_specific_args(parent_parser, root_dir): - parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) - - # param overwrites - # parser.set_defaults(gradient_clip_val=5.0) - - # network params - parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) - parser.add_argument('--in_features', default=28*28) - parser.add_argument('--out_features', default=10) - # use 500 for CPU, 50000 for GPU to see speed difference - parser.add_argument('--hidden_dim', default=50000) - - # data - parser.add_argument('--data_root', default=os.path.join(root_dir, 'mnist'), type=str) - - # training params (opt) - parser.opt_list('--learning_rate', default=0.001, type=float, - options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) - parser.opt_list('--batch_size', default=256, type=int, - options=[32, 64, 128, 256], tunable=False) - parser.opt_list('--optimizer_name', default='adam', type=str, - options=['adam'], tunable=False) - return parser - +Test """ +# """ +# Lightning Module interface +# ========================== +# +# A lightning module is a strict superclass of nn.Module, it provides a standard interface +# for the trainer to interact with the model. +# +# The easiest thing to do is copy the minimal example below and modify accordingly. +# +# Otherwise, to Define a Lightning Module, implement the following methods: +# +# +# Minimal example +# --------------- +# +# .. code-block:: python +# +# import os +# import torch +# from torch.nn import functional as F +# from torch.utils.data import DataLoader +# from torchvision.datasets import MNIST +# import torchvision.transforms as transforms +# +# import pytorch_lightning as pl +# +# class CoolModel(pl.LightningModule): +# +# def __init__(self): +# super(CoolModel, self).__init__() +# # not the best model... +# self.l1 = torch.nn.Linear(28 * 28, 10) +# +# def forward(self, x): +# return torch.relu(self.l1(x.view(x.size(0), -1))) +# +# def training_step(self, batch, batch_idx): +# # REQUIRED +# x, y = batch +# y_hat = self.forward(x) +# return {'loss': F.cross_entropy(y_hat, y)} +# +# def validation_step(self, batch, batch_idx): +# # OPTIONAL +# x, y = batch +# y_hat = self.forward(x) +# return {'val_loss': F.cross_entropy(y_hat, y)} +# +# def validation_end(self, outputs): +# # OPTIONAL +# val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() +# return {'val_loss': val_loss_mean} +# +# def test_step(self, batch, batch_idx): +# # OPTIONAL +# x, y = batch +# y_hat = self.forward(x) +# return {'test_loss': F.cross_entropy(y_hat, y)} +# +# def test_end(self, outputs): +# # OPTIONAL +# test_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() +# return {'test_loss': test_loss_mean} +# +# def configure_optimizers(self): +# # REQUIRED +# return torch.optim.Adam(self.parameters(), lr=0.02) +# +# @pl.data_loader +# def train_dataloader(self): +# return DataLoader(MNIST(os.getcwd(), train=True, download=True, +# transform=transforms.ToTensor()), batch_size=32) +# +# @pl.data_loader +# def val_dataloader(self): +# # OPTIONAL +# # can also return a list of val dataloaders +# return DataLoader(MNIST(os.getcwd(), train=True, download=True, +# transform=transforms.ToTensor()), batch_size=32) +# +# @pl.data_loader +# def test_dataloader(self): +# # OPTIONAL +# # can also return a list of test dataloaders +# return DataLoader(MNIST(os.getcwd(), train=False, download=True, +# transform=transforms.ToTensor()), batch_size=32) +# +# +# How do these methods fit into the broader training? +# --------------------------------------------------- +# +# The LightningModule interface is on the right. Each method corresponds +# to a part of a research project. Lightning automates everything not in blue. +# +# .. figure:: docs/source/_static/images/overview_flat.jpg +# :align: center +# +# Overview. +# +# +# Optional Methods +# ---------------- +# +# **add_model_specific_args** +# +# .. code-block:: python +# +# @staticmethod +# def add_model_specific_args(parent_parser, root_dir) +# +# Lightning has a list of default argparse commands. +# This method is your chance to add or modify commands specific to your model. +# The `hyperparameter argument parser +# `_ +# is available anywhere in your model by calling self.hparams. +# +# **Return** +# An argument parser +# +# **Example** +# +# .. code-block:: python +# +# @staticmethod +# def add_model_specific_args(parent_parser, root_dir): +# parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) +# +# # param overwrites +# # parser.set_defaults(gradient_clip_val=5.0) +# +# # network params +# parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) +# parser.add_argument('--in_features', default=28*28) +# parser.add_argument('--out_features', default=10) +# # use 500 for CPU, 50000 for GPU to see speed difference +# parser.add_argument('--hidden_dim', default=50000) +# +# # data +# parser.add_argument('--data_root', default=os.path.join(root_dir, 'mnist'), type=str) +# +# # training params (opt) +# parser.opt_list('--learning_rate', default=0.001, type=float, +# options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) +# parser.opt_list('--batch_size', default=256, type=int, +# options=[32, 64, 128, 256], tunable=False) +# parser.opt_list('--optimizer_name', default='adam', type=str, +# options=['adam'], tunable=False) +# return parser +# +# """ + +from .test_b import TestB +from .lightning import LightningModule + +__all__ = ['TestB'] diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 95345aadf7495..fdaba821afce1 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1,13 +1,4 @@ -""" -The LightningModule is the "system recipe." It groups the following in one file: - - computational system definition - - computations done on forward - - training loop - - validation loop - - testing loop - - train, val, test dataloaders - - optimizers -""" + import os import warnings @@ -18,69 +9,69 @@ import torch import torch.distributed as dist - +# from pytorch_lightning.core.decorators import data_loader from pytorch_lightning.core.grads import GradInformation from pytorch_lightning.core.hooks import ModelHooks -from pytorch_lightning.core.memory import ModelSummary from pytorch_lightning.core.saving import ModelIO +from pytorch_lightning.core.memory import ModelSummary from pytorch_lightning.trainer.training_io import load_hparams_from_tags_csv from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel class LightningModule(ABC, GradInformation, ModelIO, ModelHooks): - """ - A LightningModule has the following properties which you can access at any time - - **logger** - A reference to the logger you passed into trainer. - Passing a logger is optional. If you don't pass one in, Lightning will create one - for you automatically. This logger saves logs to `/os.getcwd()/lightning_logs`:: - - Trainer(logger=your_logger) - - - Call it from anywhere in your LightningModule to add metrics, images, etc... - whatever your logger supports. - - Here is an example using the TestTubeLogger (which is a wrapper - on 'PyTorch SummaryWriter `_ - with versioned folder structure). - - .. code-block:: python - - # if logger is a tensorboard logger or TestTubeLogger - self.logger.experiment.add_embedding(...) - self.logger.experiment.log({'val_loss': 0.9}) - self.logger.experiment.add_scalars(...) - - - **trainer** - Last resort access to any state the trainer has. - Changing certain properties here could affect your training run. - - .. code-block:: python - - self.trainer.optimizers - self.trainer.current_epoch - ... - - Debugging - --------- - - The LightningModule also offers these tricks to help debug. - - **example_input_array** - - In the LightningModule init, you can set a dummy tensor for this property - to get a print out of sizes coming into and out of every layer. - - .. code-block:: python - - def __init__(self): - # put the dimensions of the first input to your system - self.example_input_array = torch.rand(5, 28 * 28) - """ + # """ + # A LightningModule has the following properties which you can access at any time + # + # **logger** + # A reference to the logger you passed into trainer. + # Passing a logger is optional. If you don't pass one in, Lightning will create one + # for you automatically. This logger saves logs to `/os.getcwd()/lightning_logs`:: + # + # Trainer(logger=your_logger) + # + # + # Call it from anywhere in your LightningModule to add metrics, images, etc... + # whatever your logger supports. + # + # Here is an example using the TestTubeLogger (which is a wrapper + # on 'PyTorch SummaryWriter `_ + # with versioned folder structure). + # + # .. code-block:: python + # + # # if logger is a tensorboard logger or TestTubeLogger + # self.logger.experiment.add_embedding(...) + # self.logger.experiment.log({'val_loss': 0.9}) + # self.logger.experiment.add_scalars(...) + # + # + # **trainer** + # Last resort access to any state the trainer has. + # Changing certain properties here could affect your training run. + # + # .. code-block:: python + # + # self.trainer.optimizers + # self.trainer.current_epoch + # ... + # + # Debugging + # --------- + # + # The LightningModule also offers these tricks to help debug. + # + # **example_input_array** + # + # In the LightningModule init, you can set a dummy tensor for this property + # to get a print out of sizes coming into and out of every layer. + # + # .. code-block:: python + # + # def __init__(self): + # # put the dimensions of the first input to your system + # self.example_input_array = torch.rand(5, 28 * 28) + # """ def __init__(self, *args, **kwargs): super(LightningModule, self).__init__(*args, **kwargs) From 36da61eb012712b32818d84366616d4ffd5de0c0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 14:51:54 -0500 Subject: [PATCH 049/155] fixed lightning import --- pytorch_lightning/core/__init__.py | 3 +-- pytorch_lightning/core/lightning.py | 31 +++++++++++++++++++++++- pytorch_lightning/trainer/training_io.py | 31 ------------------------ tests/test_trainer.py | 4 +-- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index 9b20b562865b5..b737caab2bf63 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -152,7 +152,6 @@ # # """ -from .test_b import TestB from .lightning import LightningModule -__all__ = ['TestB'] +__all__ = ['LightningModule'] diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index fdaba821afce1..74b17ece42106 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -4,6 +4,7 @@ import warnings import collections import logging +import pandas as pd from abc import ABC, abstractmethod from argparse import Namespace @@ -15,7 +16,6 @@ from pytorch_lightning.core.hooks import ModelHooks from pytorch_lightning.core.saving import ModelIO from pytorch_lightning.core.memory import ModelSummary -from pytorch_lightning.trainer.training_io import load_hparams_from_tags_csv from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel @@ -1095,3 +1095,32 @@ def on_save_checkpoint(self, checkpoint): """ pass + + +def load_hparams_from_tags_csv(tags_csv): + if not os.path.isfile(tags_csv): + logging.warning(f'Missing Tags: {tags_csv}.') + return Namespace() + + tags_df = pd.read_csv(tags_csv) + dic = tags_df.to_dict(orient='records') + ns_dict = {row['key']: convert(row['value']) for row in dic} + ns = Namespace(**ns_dict) + return ns + + +def convert(val): + constructors = [int, float, str] + + if type(val) is str: + if val.lower() == 'true': + return True + if val.lower() == 'false': + return False + + for c in constructors: + try: + return c(val) + except ValueError: + pass + return val diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 91c77fea78d39..168e983585c2a 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -96,9 +96,7 @@ from subprocess import call import logging from abc import ABC -from argparse import Namespace -import pandas as pd import torch import torch.distributed as dist @@ -459,32 +457,3 @@ def max_ckpt_in_folder(self, path, name_key='ckpt_'): ckpt_vs.append(int(name)) return max(ckpt_vs) - - -def load_hparams_from_tags_csv(tags_csv): - if not os.path.isfile(tags_csv): - logging.warning(f'Missing Tags: {tags_csv}.') - return Namespace() - - tags_df = pd.read_csv(tags_csv) - dic = tags_df.to_dict(orient='records') - ns_dict = {row['key']: convert(row['value']) for row in dic} - ns = Namespace(**ns_dict) - return ns - - -def convert(val): - constructors = [int, float, str] - - if type(val) is str: - if val.lower() == 'true': - return True - if val.lower() == 'false': - return False - - for c in constructors: - try: - return c(val) - except ValueError: - pass - return val diff --git a/tests/test_trainer.py b/tests/test_trainer.py index 90430e5c01ff4..fca75c3fb12b3 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -15,7 +15,7 @@ LightningValidationMultipleDataloadersMixin, LightningTestMultipleDataloadersMixin, ) -from pytorch_lightning.trainer import training_io +from pytorch_lightning.core.lightning import load_hparams_from_tags_csv from pytorch_lightning.trainer.logging import TrainerLoggingMixin @@ -186,7 +186,7 @@ def test_loading_meta_tags(tmpdir): # load tags path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) tags_path = os.path.join(path_expt_dir, 'meta_tags.csv') - tags = training_io.load_hparams_from_tags_csv(tags_path) + tags = load_hparams_from_tags_csv(tags_path) assert tags.batch_size == 32 and tags.hidden_dim == 1000 From 6b86754bbf801edcc9b50f42b8519cee8901d1c1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 15:11:17 -0500 Subject: [PATCH 050/155] cleared spaces --- pytorch_lightning/core/__init__.py | 175 +++++++++++++++++++++------- pytorch_lightning/core/lightning.py | 72 +++--------- 2 files changed, 151 insertions(+), 96 deletions(-) diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index b737caab2bf63..56d6a2582953e 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -1,146 +1,239 @@ """ -Test +A LightningModule is a strict superclass of torch.nn.Module but provides an interface to standardize +the "ingredients" for a research or production system. + +- The model/system definition (__init__) +- The model/system computations (forward) +- What happens in the training loop (training_step, training_end) +- What happens in the validation loop (validation_step, validation_end) +- What happens in the test loop (test_step, test_end) +- What optimizers to use (configure_optimizers) +- What data to use (train_dataloader, val_dataloader, test_dataloader) + +Most methods are optional. Here's a minimal example. + +.. code-block:: python + + import os + import torch + from torch.nn import functional as F + from torch.utils.data import DataLoader + from torchvision.datasets import MNIST + import torchvision.transforms as transforms + + import pytorch_lightning as pl + + class CoolModel(pl.LightningModule): + + def __init__(self): + super(CoolModel, self).__init__() + self.l1 = torch.nn.Linear(28 * 28, 10) + + def forward(self, x): + return torch.relu(self.l1(x.view(x.size(0), -1))) + + def training_step(self, batch, batch_idx): + x, y = batch + y_hat = self.forward(x) + return {'loss': F.cross_entropy(y_hat, y)} + + def validation_step(self, batch, batch_idx): + # OPTIONAL + x, y = batch + y_hat = self.forward(x) + return {'val_loss': F.cross_entropy(y_hat, y)} + + def validation_end(self, outputs): + # OPTIONAL + val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() + return {'val_loss': val_loss_mean} + + def test_step(self, batch, batch_idx): + # OPTIONAL + x, y = batch + y_hat = self.forward(x) + return {'test_loss': F.cross_entropy(y_hat, y)} + + def test_end(self, outputs): + # OPTIONAL + test_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() + return {'test_loss': test_loss_mean} + + def configure_optimizers(self): + # REQUIRED + return torch.optim.Adam(self.parameters(), lr=0.02) + + @pl.data_loader + def train_dataloader(self): + return DataLoader(MNIST(os.getcwd(), train=True, download=True, + transform=transforms.ToTensor()), batch_size=32) + + @pl.data_loader + def val_dataloader(self): + # OPTIONAL + # can also return a list of val dataloaders + return DataLoader(MNIST(os.getcwd(), train=True, download=True, + transform=transforms.ToTensor()), batch_size=32) + + @pl.data_loader + def test_dataloader(self): + # OPTIONAL + # can also return a list of test dataloaders + return DataLoader(MNIST(os.getcwd(), train=False, download=True, + transform=transforms.ToTensor()), batch_size=32) + +Once you've defined the LightningModule, fit it using a trainer. + +.. code-block:: python + trainer = pl.Trainer() + model = CoolModel() + + trainer.fit(model) + +Check out this `COLAB `_ +for a live demo. + """ # """ # Lightning Module interface # ========================== -# +# # A lightning module is a strict superclass of nn.Module, it provides a standard interface # for the trainer to interact with the model. -# +# # The easiest thing to do is copy the minimal example below and modify accordingly. -# +# # Otherwise, to Define a Lightning Module, implement the following methods: -# -# +# +# # Minimal example # --------------- -# +# # .. code-block:: python -# +# # import os # import torch # from torch.nn import functional as F # from torch.utils.data import DataLoader # from torchvision.datasets import MNIST # import torchvision.transforms as transforms -# +# # import pytorch_lightning as pl -# +# # class CoolModel(pl.LightningModule): -# +# # def __init__(self): # super(CoolModel, self).__init__() # # not the best model... # self.l1 = torch.nn.Linear(28 * 28, 10) -# +# # def forward(self, x): # return torch.relu(self.l1(x.view(x.size(0), -1))) -# +# # def training_step(self, batch, batch_idx): # # REQUIRED # x, y = batch # y_hat = self.forward(x) # return {'loss': F.cross_entropy(y_hat, y)} -# +# # def validation_step(self, batch, batch_idx): # # OPTIONAL # x, y = batch # y_hat = self.forward(x) # return {'val_loss': F.cross_entropy(y_hat, y)} -# +# # def validation_end(self, outputs): # # OPTIONAL # val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() # return {'val_loss': val_loss_mean} -# +# # def test_step(self, batch, batch_idx): # # OPTIONAL # x, y = batch # y_hat = self.forward(x) # return {'test_loss': F.cross_entropy(y_hat, y)} -# +# # def test_end(self, outputs): # # OPTIONAL # test_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() # return {'test_loss': test_loss_mean} -# +# # def configure_optimizers(self): # # REQUIRED # return torch.optim.Adam(self.parameters(), lr=0.02) -# +# # @pl.data_loader # def train_dataloader(self): # return DataLoader(MNIST(os.getcwd(), train=True, download=True, # transform=transforms.ToTensor()), batch_size=32) -# +# # @pl.data_loader # def val_dataloader(self): # # OPTIONAL # # can also return a list of val dataloaders # return DataLoader(MNIST(os.getcwd(), train=True, download=True, # transform=transforms.ToTensor()), batch_size=32) -# +# # @pl.data_loader # def test_dataloader(self): # # OPTIONAL # # can also return a list of test dataloaders # return DataLoader(MNIST(os.getcwd(), train=False, download=True, # transform=transforms.ToTensor()), batch_size=32) -# -# +# +# # How do these methods fit into the broader training? # --------------------------------------------------- -# +# # The LightningModule interface is on the right. Each method corresponds # to a part of a research project. Lightning automates everything not in blue. -# +# # .. figure:: docs/source/_static/images/overview_flat.jpg # :align: center -# +# # Overview. -# -# +# +# # Optional Methods # ---------------- -# +# # **add_model_specific_args** -# +# # .. code-block:: python -# +# # @staticmethod # def add_model_specific_args(parent_parser, root_dir) -# +# # Lightning has a list of default argparse commands. # This method is your chance to add or modify commands specific to your model. # The `hyperparameter argument parser # `_ # is available anywhere in your model by calling self.hparams. -# +# # **Return** # An argument parser -# +# # **Example** -# +# # .. code-block:: python -# +# # @staticmethod # def add_model_specific_args(parent_parser, root_dir): # parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) -# +# # # param overwrites # # parser.set_defaults(gradient_clip_val=5.0) -# +# # # network params # parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) # parser.add_argument('--in_features', default=28*28) # parser.add_argument('--out_features', default=10) # # use 500 for CPU, 50000 for GPU to see speed difference # parser.add_argument('--hidden_dim', default=50000) -# +# # # data # parser.add_argument('--data_root', default=os.path.join(root_dir, 'mnist'), type=str) -# +# # # training params (opt) # parser.opt_list('--learning_rate', default=0.001, type=float, # options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) @@ -149,7 +242,7 @@ # parser.opt_list('--optimizer_name', default='adam', type=str, # options=['adam'], tunable=False) # return parser -# +# # """ from .lightning import LightningModule diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 74b17ece42106..7328e57bc6310 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -10,7 +10,7 @@ import torch import torch.distributed as dist -# +# from pytorch_lightning.core.decorators import data_loader from pytorch_lightning.core.grads import GradInformation from pytorch_lightning.core.hooks import ModelHooks @@ -20,81 +20,43 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks): - # """ - # A LightningModule has the following properties which you can access at any time - # - # **logger** - # A reference to the logger you passed into trainer. - # Passing a logger is optional. If you don't pass one in, Lightning will create one - # for you automatically. This logger saves logs to `/os.getcwd()/lightning_logs`:: - # - # Trainer(logger=your_logger) - # - # - # Call it from anywhere in your LightningModule to add metrics, images, etc... - # whatever your logger supports. - # - # Here is an example using the TestTubeLogger (which is a wrapper - # on 'PyTorch SummaryWriter `_ - # with versioned folder structure). - # - # .. code-block:: python - # - # # if logger is a tensorboard logger or TestTubeLogger - # self.logger.experiment.add_embedding(...) - # self.logger.experiment.log({'val_loss': 0.9}) - # self.logger.experiment.add_scalars(...) - # - # - # **trainer** - # Last resort access to any state the trainer has. - # Changing certain properties here could affect your training run. - # - # .. code-block:: python - # - # self.trainer.optimizers - # self.trainer.current_epoch - # ... - # - # Debugging - # --------- - # - # The LightningModule also offers these tricks to help debug. - # - # **example_input_array** - # - # In the LightningModule init, you can set a dummy tensor for this property - # to get a print out of sizes coming into and out of every layer. - # - # .. code-block:: python - # - # def __init__(self): - # # put the dimensions of the first input to your system - # self.example_input_array = torch.rand(5, 28 * 28) - # """ - def __init__(self, *args, **kwargs): super(LightningModule, self).__init__(*args, **kwargs) #: Current dtype self.dtype = torch.FloatTensor + self.exp_save_path = None + #: The current epoch self.current_epoch = 0 + #: Total training batches seen across all epochs self.global_step = 0 + self.loaded_optimizer_states_dict = {} + + #: Pointer to the trainer object self.trainer = None + + #: Pointer to the logger object self.logger = None self.example_input_array = None - # track if gpu was requested for checkpointing #: True if your model is currently running on GPUs. #: Useful to set flags around the LightningModule for different CPU vs GPU behavior. self.on_gpu = False + + #: True if using dp self.use_dp = False + + #: True if using ddp self.use_ddp = False + + #: True if using ddp2 self.use_ddp2 = False + + #: True if using amp self.use_amp = False @abstractmethod From f1024a63e5bce2c234723e0ac1b4093a1319964d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 15:12:09 -0500 Subject: [PATCH 051/155] cleared spaces --- pytorch_lightning/core/__init__.py | 151 ----------------------------- 1 file changed, 151 deletions(-) diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index 56d6a2582953e..8a4223448f48a 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -94,157 +94,6 @@ def test_dataloader(self): for a live demo. """ -# """ -# Lightning Module interface -# ========================== -# -# A lightning module is a strict superclass of nn.Module, it provides a standard interface -# for the trainer to interact with the model. -# -# The easiest thing to do is copy the minimal example below and modify accordingly. -# -# Otherwise, to Define a Lightning Module, implement the following methods: -# -# -# Minimal example -# --------------- -# -# .. code-block:: python -# -# import os -# import torch -# from torch.nn import functional as F -# from torch.utils.data import DataLoader -# from torchvision.datasets import MNIST -# import torchvision.transforms as transforms -# -# import pytorch_lightning as pl -# -# class CoolModel(pl.LightningModule): -# -# def __init__(self): -# super(CoolModel, self).__init__() -# # not the best model... -# self.l1 = torch.nn.Linear(28 * 28, 10) -# -# def forward(self, x): -# return torch.relu(self.l1(x.view(x.size(0), -1))) -# -# def training_step(self, batch, batch_idx): -# # REQUIRED -# x, y = batch -# y_hat = self.forward(x) -# return {'loss': F.cross_entropy(y_hat, y)} -# -# def validation_step(self, batch, batch_idx): -# # OPTIONAL -# x, y = batch -# y_hat = self.forward(x) -# return {'val_loss': F.cross_entropy(y_hat, y)} -# -# def validation_end(self, outputs): -# # OPTIONAL -# val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() -# return {'val_loss': val_loss_mean} -# -# def test_step(self, batch, batch_idx): -# # OPTIONAL -# x, y = batch -# y_hat = self.forward(x) -# return {'test_loss': F.cross_entropy(y_hat, y)} -# -# def test_end(self, outputs): -# # OPTIONAL -# test_loss_mean = torch.stack([x['test_loss'] for x in outputs]).mean() -# return {'test_loss': test_loss_mean} -# -# def configure_optimizers(self): -# # REQUIRED -# return torch.optim.Adam(self.parameters(), lr=0.02) -# -# @pl.data_loader -# def train_dataloader(self): -# return DataLoader(MNIST(os.getcwd(), train=True, download=True, -# transform=transforms.ToTensor()), batch_size=32) -# -# @pl.data_loader -# def val_dataloader(self): -# # OPTIONAL -# # can also return a list of val dataloaders -# return DataLoader(MNIST(os.getcwd(), train=True, download=True, -# transform=transforms.ToTensor()), batch_size=32) -# -# @pl.data_loader -# def test_dataloader(self): -# # OPTIONAL -# # can also return a list of test dataloaders -# return DataLoader(MNIST(os.getcwd(), train=False, download=True, -# transform=transforms.ToTensor()), batch_size=32) -# -# -# How do these methods fit into the broader training? -# --------------------------------------------------- -# -# The LightningModule interface is on the right. Each method corresponds -# to a part of a research project. Lightning automates everything not in blue. -# -# .. figure:: docs/source/_static/images/overview_flat.jpg -# :align: center -# -# Overview. -# -# -# Optional Methods -# ---------------- -# -# **add_model_specific_args** -# -# .. code-block:: python -# -# @staticmethod -# def add_model_specific_args(parent_parser, root_dir) -# -# Lightning has a list of default argparse commands. -# This method is your chance to add or modify commands specific to your model. -# The `hyperparameter argument parser -# `_ -# is available anywhere in your model by calling self.hparams. -# -# **Return** -# An argument parser -# -# **Example** -# -# .. code-block:: python -# -# @staticmethod -# def add_model_specific_args(parent_parser, root_dir): -# parser = HyperOptArgumentParser(strategy=parent_parser.strategy, parents=[parent_parser]) -# -# # param overwrites -# # parser.set_defaults(gradient_clip_val=5.0) -# -# # network params -# parser.opt_list('--drop_prob', default=0.2, options=[0.2, 0.5], type=float, tunable=False) -# parser.add_argument('--in_features', default=28*28) -# parser.add_argument('--out_features', default=10) -# # use 500 for CPU, 50000 for GPU to see speed difference -# parser.add_argument('--hidden_dim', default=50000) -# -# # data -# parser.add_argument('--data_root', default=os.path.join(root_dir, 'mnist'), type=str) -# -# # training params (opt) -# parser.opt_list('--learning_rate', default=0.001, type=float, -# options=[0.0001, 0.0005, 0.001, 0.005], tunable=False) -# parser.opt_list('--batch_size', default=256, type=int, -# options=[32, 64, 128, 256], tunable=False) -# parser.opt_list('--optimizer_name', default='adam', type=str, -# options=['adam'], tunable=False) -# return parser -# -# """ - from .lightning import LightningModule __all__ = ['LightningModule'] From 891b2c075d0ca6b92c4964cc2c166ceba0bd8830 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 15:36:40 -0500 Subject: [PATCH 052/155] cleared spaces --- pytorch_lightning/core/lightning.py | 125 ++++++++++++++++++++-------- 1 file changed, 91 insertions(+), 34 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 7328e57bc6310..ee7c9e85f0287 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -61,11 +61,52 @@ def __init__(self, *args, **kwargs): @abstractmethod def forward(self, *args, **kwargs): - """ - Expand model in into whatever you need. - Also need to return the target - :param x: - :return: + r""" + Same as torch.nn.Module.forward(), however in Lightning you want this to define + the operations you want to use for prediction (ie: on a server or as a feature extractor). + + Normally you'd call self.forward() from your training_step() method. This makes it easy to write a complex + system for training with the outputs you'd want in a prediction setting. + + Args: + x (tensor): Whatever you decide to define in the forward method + + Return: + Predicted output + + Example + ------- + + .. code-block:: python + + # example if we were using this model as a feature extractor + def forward(self, x): + feature_maps = self.convnet(x) + return feature_maps + + def training_step(self, batch, batch_idx): + x, y = batch + feature_maps = self.forward(x) + logits = self.classifier(feature_maps) + + # ... + return loss + + # splitting it this way allows you to use your model as a feature extractor now + model = MyModelAbove() + + inputs = server.get_request() + results = model(inputs) + server.write_results(results) + + # ------------- + # This is in stark contrast to torch.nn.Module where normally you would have this: + def forward(self, batch): + x, y = batch + feature_maps = self.convnet(x) + logits = self.classifier(feature_maps) + return logits + """ @abstractmethod @@ -484,20 +525,27 @@ def test_end(self, outputs): pass def configure_ddp(self, model, device_ids): - """Override to init DDP in a different way or use your own wrapper. + r""" - :param model: - :param device_ids: - :return: DDP wrapped model + Override to init DDP in your own way or with your own wrapper. + The only requirements are that: - Overwrite to define your own DDP implementation init. - The only requirement is that: 1. On a validation batch the call goes to model.validation_step. 2. On a training batch the call goes to model.training_step. 3. On a testing batch, the call goes to model.test_step + Args: + model (LightningModule): the LightningModule currently being optimized + device_ids (list): the list of GPU ids + + Return: + DDP wrapped model + + Example + ------- .. code-block:: python + # default implementation used in Trainer def configure_ddp(self, model, device_ids): # Lightning DDP simply routes to test_step, val_step, etc... model = LightningDistributedDataParallel( @@ -585,19 +633,24 @@ def init_ddp_connection(self): dist.init_process_group('nccl', rank=proc_rank, world_size=world_size) def configure_apex(self, amp, model, optimizers, amp_level): - """ + r""" Override to init AMP your own way Must return a model and list of optimizers - :param amp: - :param model: - :param optimizers: - :param amp_level: - :return: Apex wrapped model and optimizers - Overwrite to define your own Apex implementation init. + Args: + amp (object): pointer to amp library object + model (LightningModule): pointer to current lightningModule + optimizers (list): list of optimizers passed in configure_optimizers() + amp_level (str): AMP mode chosen ('O1', 'O2', etc...) + Return: + Apex wrapped model and optimizers + + Example + ------- .. code-block:: python + # Default implementation used by Trainer. def configure_apex(self, amp, model, optimizers, amp_level): model, optimizers = amp.initialize( model, optimizers, opt_level=amp_level, @@ -613,25 +666,16 @@ def configure_apex(self, amp, model, optimizers, amp_level): @abstractmethod def configure_optimizers(self): - """Return a list of optimizers and a list of schedulers (could be empty) + r""" - :return: any of these 3 options: + This is where you choose what optimizers and learning-rate schedulers to use in your optimization. + Normally you'd need one. But in the case of GANs or something more esoteric you might have multiple. + + Return: any of these 3 options: - Single optimizer - List or Tuple - List of optimizers - Two lists - The first list has multiple optimizers, the second a list of learning-rate schedulers - Set up as many optimizers and (optionally) learning rate schedulers as you need. - Normally you'd need one. But in the case of GANs or something more esoteric you might have multiple. - Lightning will call .backward() and .step() on each one in every epoch. - If you use 16 bit precision it will also handle that. - - .. note:: If you use multiple optimizers, training_step will have an additional `optimizer_idx` parameter. - - .. note:: If you use LBFGS lightning handles the closure function automatically for you - - .. note:: If you use multiple optimizers, gradients will be calculated only - for the parameters of current optimizer at each training step. - Example ------- @@ -655,8 +699,21 @@ def configure_optimizers(self): discriminator_sched = CosineAnnealing(discriminator_opt, T_max=10) return [generator_opt, disriminator_opt], [discriminator_sched] - If you need to control how often those optimizers step or override the default .step() schedule, - override the `optimizer_step` hook. + .. note:: Lightning calls .backward() and .step() on each optimizer and learning rate scheduler as needed. + + .. note:: If you use 16-bit precision (use_amp=True), Lightning will automatically + handle the optimizers for you. + + .. note:: If you use multiple optimizers, training_step will have an additional `optimizer_idx` parameter. + + .. note:: If you use LBFGS lightning handles the closure function automatically for you + + .. note:: If you use multiple optimizers, gradients will be calculated only + for the parameters of current optimizer at each training step. + + .. note:: If you need to control how often those optimizers step or override the default .step() schedule, + override the `optimizer_step` hook. + """ From 16c92eaa56b50766fb7c177546d990fc1b95a9c5 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:08:03 -0500 Subject: [PATCH 053/155] cleared spaces --- pytorch_lightning/core/lightning.py | 131 ++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 37 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index ee7c9e85f0287..9d12ac8185630 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -92,7 +92,7 @@ def training_step(self, batch, batch_idx): # ... return loss - # splitting it this way allows you to use your model as a feature extractor now + # splitting it this way allows model to be used a feature extractor model = MyModelAbove() inputs = server.get_request() @@ -565,11 +565,17 @@ def configure_ddp(self, model, device_ids): return model def init_ddp_connection(self, proc_rank, world_size): - """Connect all procs in the world using the env:// init - Use the first node as the root address + r""" + + Override to define your custom way of setting up a distributed environment. - Override to init DDP in your own way. + Lightning's implementation uses env:// init by default and sets the first node as root. + Args: + proc_rank (int): The current process rank within the node. + world_size (int): Number of GPUs being use across all nodes. (num_nodes*nb_gpu_nodes). + Example + ------- .. code-block:: python def init_ddp_connection(self): @@ -600,7 +606,11 @@ def init_ddp_connection(self): root_node = self.trainer.resolve_root_node_address(root_node) os.environ['MASTER_ADDR'] = root_node - dist.init_process_group('nccl', rank=self.proc_rank, world_size=self.world_size) + dist.init_process_group( + 'nccl', + rank=self.proc_rank, + world_size=self.world_size + ) """ # use slurm job id for the port number @@ -945,29 +955,41 @@ def val_dataloader(self): return [loader_a, loader_b, ..., loader_n] In the case where you return multiple `val_dataloaders`, the `validation_step` - will have an arguement `dataset_idx` which matches the order here. + will have an argument `dataset_idx` which matches the order here. """ return None @classmethod def load_from_metrics(cls, weights_path, tags_csv, map_location=None): - """Primary way of loading model from csv weights path. + r""" - :param str weights_path: Path to a PyTorch checkpoint - :param str tags_csv: Path to meta_tags.csv file generated by the test-tube Experiment - :param dict map_location: A dictionary mapping saved weight GPU devices to new GPU devices - for mapping storage {'cuda:1':'cuda:0'} - :return: The pretrained LightningModule + You should use `load_from_checkpoint` instead! + However, if your .ckpt weights don't have the hyperparameters saved, use this method to pass + in a .csv with the hparams you'd like to use. These will be converted into a argparse.Namespace + and passed into your LightningModule for use. - If you're using `test-tube`, there is an alternate method which uses the meta_tags.csv - file from test-tube to rebuild the model. The `meta_tags.csv` file can be found in the - `test-tube` experiment save_dir. + Args: + + weights_path (str): Path to a PyTorch checkpoint + tags_csv (str): Path to a .csv with two columns (key, value) as in this + Example:: + key,value + drop_prob,0.2 + batch_size,32 + + map_location (dict): A dictionary mapping saved weight GPU devices to new + GPU devices (example: {'cuda:1':'cuda:0'}) + Return: + LightningModule with loaded weights + + Example + ------- .. code-block:: python pretrained_model = MyLightningModule.load_from_metrics( weights_path='/path/to/pytorch_checkpoint.ckpt', - tags_csv='/path/to/test_tube/experiment/version/meta_tags.csv', + tags_csv='/path/to/hparams_file.csv', on_gpu=True, map_location=None ) @@ -976,22 +998,8 @@ def load_from_metrics(cls, weights_path, tags_csv, map_location=None): pretrained_model.eval() pretrained_model.freeze() y_hat = pretrained_model(x) - - This is the easiest/fastest way which loads hyperparameters and weights from a checkpoint, - such as the one saved by the `ModelCheckpoint` callback - - .. code-block:: python - - pretrained_model = MyLightningModule.load_from_checkpoint( - checkpoint_path='/path/to/pytorch_checkpoint.ckpt' - ) - - # predict - pretrained_model.eval() - pretrained_model.freeze() - y_hat = pretrained_model(x) - """ + hparams = load_hparams_from_tags_csv(tags_csv) hparams.__setattr__('on_gpu', False) @@ -1011,11 +1019,56 @@ def load_from_metrics(cls, weights_path, tags_csv, map_location=None): @classmethod def load_from_checkpoint(cls, checkpoint_path, map_location=None): - """ - Primary way of loading model from a checkpoint - :param checkpoint_path: - :param map_location: dic for mapping storage {'cuda:1':'cuda:0'} - :return: + r""" + + Primary way of loading model from a checkpoint. When Lightning saves a checkpoint + it stores the hyperparameters in the checkpoint if you initialized your LightningModule + with an argument called `hparams` which is a Namespace or dictionary of hyperparameters + + Example + ------- + .. code-block:: python + + # -------------- + # Case 1 + # when using Namespace (output of using Argparse to parse command line arguments) + from argparse import Namespace + hparams = Namespace(**{'learning_rate': 0.1}) + + model = MyModel(hparams) + + class MyModel(pl.LightningModule): + def __init__(self, hparams): + self.learning_rate = hparams.learning_rate + + # -------------- + # Case 2 + # when using a dict + model = MyModel({'learning_rate': 0.1}) + + class MyModel(pl.LightningModule): + def __init__(self, hparams): + self.learning_rate = hparams['learning_rate'] + + Args: + checkpoint_path (str): Path to checkpoint. + map_location (dic): If your checkpoint saved from a GPU model and you now load on CPUs + or a different number of GPUs, use this to map to the new setup. + + Return: + LightningModule with loaded weights. + + Example + ------- + .. code-block:: python + + # load weights without mapping + MyLightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') + + # load weights mapping all weights from GPU 1 to GPU 0 + map_location = {'cuda:1':'cuda:0'} + MyLightningModule.load_from_checkpoint('path/to/checkpoint.ckpt', map_location=map_location) + """ if map_location is not None: @@ -1046,8 +1099,12 @@ def summarize(self, mode): logging.info('\n' + model_summary.__str__()) def freeze(self): - """Freeze all params for inference + r""" + Freeze all params for inference + + Example + ------- .. code-block:: python model = MyLightningModule(...) From 0513808fef5d6395a3c9da8f0ff14b7333aed057 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:20:03 -0500 Subject: [PATCH 054/155] cleared spaces --- pytorch_lightning/core/lightning.py | 50 ++++++++++++++++------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 9d12ac8185630..dac7994c60f71 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -728,20 +728,20 @@ def configure_optimizers(self): """ def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx, second_order_closure=None): - """Do something instead of the standard optimizer behavior - - :param int epoch: - :param int batch_idx: - :param optimizer: - :param optimizer_idx: - :param second_order_closure: closure for second order methods - :return: + r""" - Calls `.step()` and `.zero_grad` for each optimizer. - You can override this method to adjust how you do the optimizer step for each optimizer + Override this method to adjust the default way the Trainer calls each optimizer. By default, Lightning + calls .step() and zero_grad() as shown in the example once per optimizer. - Called once per optimizer + Args: + epoch (int): Current epoch + batch_idx (int): Index of current batch + optimizer (torch.nn.Optimizer): A PyTorch optimizer + optimizer_idx (int): If you used multiple optimizers this indexes into that list + second_order_closure (int): closure for second order methods + Example + ------- .. code-block:: python # DEFAULT @@ -767,7 +767,7 @@ def optimizer_step(self, current_epoch, batch_idx, optimizer, optimizer_idx, sec # add as many optimizers as you want - This step allows you to do a lot of non-standard training tricks such as learning-rate warm-up: + Here's another example showing how to use this for more advanced things such as learning-rate warm-up: .. code-block:: python @@ -1131,13 +1131,14 @@ def unfreeze(self): self.train() def on_load_checkpoint(self, checkpoint): - """ + r""" + + Called by lightning to restore your model. + If you saved something with **on_save_checkpoint** this is your chance to restore this. - :param checkpoint: + Args: + checkpoint (dict): Loaded checkpoint - Called by lightning to restore your model. Lighting auto-restores global step, epoch, etc... - It also restores the model state_dict. - If you saved something with **on_save_checkpoint** this is your chance to restore this. Example ------- @@ -1148,17 +1149,19 @@ def on_load_checkpoint(self, checkpoint): # 99% of the time you don't need to implement this method self.something_cool_i_want_to_save = checkpoint['something_cool_i_want_to_save'] + .. note:: Lighting auto-restores global step, epoch, and all training state including amp scaling. + No need for you to restore anything regarding training. """ pass def on_save_checkpoint(self, checkpoint): - """ + r""" - :param checkpoint: + Called by lightning when saving a checkpoint to give you a chance to store anything else you + might want to save - Called by lightning to checkpoint your model. Lightning saves the training state - (current epoch, global_step, etc) and also saves the model state_dict. - If you want to save anything else, use this method to add your own key-value pair. + Args: + checkpoint (dic): Checkpoint to be saved Example ------- @@ -1169,6 +1172,9 @@ def on_save_checkpoint(self, checkpoint): # 99% of use cases you don't need to implement this method checkpoint['something_cool_i_want_to_save'] = my_cool_pickable_object + .. note:: Lighting saves all aspects of training (epoch, global step, etc...) including amp scaling. No need + for you to store anything about training. + """ pass From a23e8bd0aeabd0adf7f4175d2dd34c99660e73be Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:23:08 -0500 Subject: [PATCH 055/155] cleared spaces --- pytorch_lightning/core/lightning.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index dac7994c60f71..8c57cbf77ad62 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -793,18 +793,25 @@ def optimizer_step(self, current_epoch, batch_idx, optimizer, optimizer_idx, sec optimizer.zero_grad() def tbptt_split_batch(self, batch, split_size): - """ - Return list of batch splits. Each split will be passed to forward_step to enable truncated - back propagation through time. The default implementation splits root level Tensors and - Sequences at dim=1 (i.e. time dim). It assumes that each time dim is the same length. + r""" + + When using truncated backpropagation through time, each batch must be split along the time dimension. + Lightning handles this by default, but for custom behavior override this function. + + Args: + batch (torch.nn.Tensor): Current batch + split_size (int): How big the split is - :param batch: - :param split_size: - :return: + .. note:: Called in the training loop after on_batch_start if `truncated_bptt_steps > 0`. + Each returned batch split is passed separately to training_step(...). - Called in the training loop after on_batch_start if `truncated_bptt_steps > 0`. - Each returned batch split is passed separately to training_step(...). + Return: + list of batch splits. Each split will be passed to forward_step to enable truncated + back propagation through time. The default implementation splits root level Tensors and + Sequences at dim=1 (i.e. time dim). It assumes that each time dim is the same length. + Example + ------- .. code-block:: python def tbptt_split_batch(self, batch, split_size): From 0c38bd6fc250b1c66b0cbb696b09ca73f88a6475 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:23:33 -0500 Subject: [PATCH 056/155] cleared spaces --- pytorch_lightning/core/lightning.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 8c57cbf77ad62..94ce93846b434 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -802,9 +802,6 @@ def tbptt_split_batch(self, batch, split_size): batch (torch.nn.Tensor): Current batch split_size (int): How big the split is - .. note:: Called in the training loop after on_batch_start if `truncated_bptt_steps > 0`. - Each returned batch split is passed separately to training_step(...). - Return: list of batch splits. Each split will be passed to forward_step to enable truncated back propagation through time. The default implementation splits root level Tensors and @@ -831,6 +828,10 @@ def tbptt_split_batch(self, batch, split_size): splits.append(batch_split) return splits + + .. note:: Called in the training loop after on_batch_start if `truncated_bptt_steps > 0`. + Each returned batch split is passed separately to training_step(...). + """ time_dims = [len(x[0]) for x in batch if isinstance( x, torch.Tensor) or isinstance(x, collections.Sequence)] From b4bf33852ce70a34442035d95427a008dba121a8 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:25:25 -0500 Subject: [PATCH 057/155] cleared spaces --- pytorch_lightning/core/lightning.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 94ce93846b434..1b22ebfe29d78 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -898,15 +898,13 @@ def tng_dataloader(self): @data_loader def test_dataloader(self): - """Implement a PyTorch DataLoader. - - :return: PyTorch DataLoader - - If you don't need a test dataset and a test_step, you don't need to implement this method. + r""" Called by lightning during test loop. Make sure to use the @pl.data_loader decorator, - this ensures not calling this function until the data are needed. - If you want to change the data during every epoch DON'T use the data_loader decorator. + this ensures not calling this function until the data are needed. + + Return: + PyTorch DataLoader Example ------- @@ -925,6 +923,10 @@ def test_dataloader(self): return loader + .. note:: If you don't need a test dataset and a test_step, you don't need to implement this method. + + .. note:: If you want to change the data during every epoch DON'T use the data_loader decorator. + """ return None From d27ff46920da80bcb38d8fc3d6e68e13250faa07 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:33:58 -0500 Subject: [PATCH 058/155] cleared spaces --- pytorch_lightning/core/decorators.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pytorch_lightning/core/decorators.py b/pytorch_lightning/core/decorators.py index 0a87e00f57fc7..aeea1a7e44256 100644 --- a/pytorch_lightning/core/decorators.py +++ b/pytorch_lightning/core/decorators.py @@ -1,4 +1,5 @@ import traceback +from functools import wraps def data_loader(fn): @@ -8,6 +9,7 @@ def data_loader(fn): :return: """ + wraps(fn) attr_name = '_lazy_' + fn.__name__ def _get_data_loader(self): From de32a72c97729e14c9c0774826930c66c1b1b99c Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:36:33 -0500 Subject: [PATCH 059/155] cleared spaces --- pytorch_lightning/core/lightning.py | 40 +++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 1b22ebfe29d78..273c3ffe52a67 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -932,15 +932,35 @@ def test_dataloader(self): @data_loader def val_dataloader(self): - """Implement a PyTorch DataLoader. + r""" - :return: PyTorch DataLoader or list of PyTorch Dataloaders. + Called by lightning during validation loop. Make sure to use the @pl.data_loader decorator, + this ensures not calling this function until the data are needed. - If you don't need a validation dataset and a validation_step, you don't need to implement this method. + Return: + PyTorch DataLoader - Called by lightning during validation loop. Make sure to use the @pl.data_loader decorator, - this ensures not calling this function until the data are needed. - If you want to change the data during every epoch DON'T use the data_loader decorator. + Example + ------- + + .. code-block:: python + + @pl.data_loader + def val_dataloader(self): + transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))]) + dataset = MNIST(root='/path/to/mnist/', train=False, transform=transform, download=True) + loader = torch.utils.data.DataLoader( + dataset=dataset, + batch_size=self.hparams.batch_size, + shuffle=True + ) + + return loader + + # can also return multiple dataloaders + @pl.data_loader + def val_dataloader(self): + return [loader_a, loader_b, ..., loader_n] Example ------- @@ -964,8 +984,12 @@ def val_dataloader(self): def val_dataloader(self): return [loader_a, loader_b, ..., loader_n] - In the case where you return multiple `val_dataloaders`, the `validation_step` - will have an argument `dataset_idx` which matches the order here. + .. note:: If you don't need a validation dataset and a validation_step, you don't need to implement this method. + + .. note:: If you want to change the data during every epoch DON'T use the data_loader decorator. + + .. note:: In the case where you return multiple `val_dataloaders`, the `validation_step` + will have an argument `dataset_idx` which matches the order here. """ return None From 94018d49f2495317ce2297f2712eeabc10f06d12 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:43:44 -0500 Subject: [PATCH 060/155] finished lightning module --- pytorch_lightning/core/lightning.py | 30 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 273c3ffe52a67..30e4a7d66cf8b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -250,12 +250,19 @@ def training_step(self, batch, batch_idx, hiddens): pass def validation_step(self, *args, **kwargs): - """return whatever outputs will need to be aggregated in validation_end + r""" - :param batch: The output of your dataloader. A tensor, tuple or list - :param int batch_idx: Integer displaying which batch this is - :param int dataloader_idx: Integer displaying which dataloader this is (only if multiple val datasets used) - :return dict: Dict or OrderedDict - passed to the validation_end step + This is the validation loop. It is called for each batch of the validation set. + Whatever is returned from here will be passed in as a list on validation_end. + In this step you'd normally generate examples or calculate anything of interest such as accuracy. + + Args: + batch (torch.nn.Tensor | (Tensor, Tensor) | [Tensor, Tensor]): The output of your dataloader. A tensor, tuple or list + batch_idx (int): The index of this batch + dataloader_idx (int): The index of the dataloader that produced this batch (only if multiple val datasets used) + + Return: + Dict or OrderedDict - passed to the validation_end step .. code-block:: python @@ -265,14 +272,6 @@ def validation_step(self, batch, batch_idx) # if you have multiple val dataloaders: def validation_step(self, batch, batch_idx, dataloader_idxdx) - If you don't need to validate you don't need to implement this method. - In this step you'd normally generate examples or calculate anything of interest such as accuracy. - - When the validation_step is called, the model has been put in eval mode and PyTorch gradients - have been disabled. At the end of validation, model goes back to training mode and gradients are enabled. - - The dict you return here will be available in the `validation_end` method. - Example ------- @@ -314,7 +313,10 @@ def validation_step(self, batch, batch_idx): def validation_step(self, batch, batch_idx, dataset_idx): # dataset_idx tells you which dataset this is. - The `dataset_idx` corresponds to the order of datasets returned in `val_dataloader`. + .. note:: If you don't need to validate you don't need to implement this method. + + .. note:: When the validation_step is called, the model has been put in eval mode and PyTorch gradients + have been disabled. At the end of validation, model goes back to training mode and gradients are enabled. """ pass From 070d008bd0460113aed64c8454506db54c22ceea Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:50:05 -0500 Subject: [PATCH 061/155] finished lightning module --- pytorch_lightning/callbacks/pt_callbacks.py | 48 +++++++++++---------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 55a41e7e2aa44..117c47060b097 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -30,15 +30,16 @@ def set_model(self, model): self.model = model def on_epoch_begin(self, epoch, logs=None): - r""" + """ called when the epoch begins - + Args: epoch (int): current epoch logs (dict): key-value pairs of quantities to monitor - + Example: - >>> on_epoch_begin(epoch=2, logs={'val_loss': 0.2}) + + on_epoch_begin(epoch=2, logs={'val_loss': 0.2}) """ pass @@ -46,7 +47,7 @@ def on_epoch_end(self, epoch, logs=None): pass def on_batch_begin(self, batch, logs=None): - r""" + """ called when the batch starts. Args: @@ -168,10 +169,11 @@ class ModelCheckpoint(Callback): filepath (str): path to save the model file. Can contain named formatting options to be auto-filled. - Example: - >>> # save epoch and val_loss in name - >>> ModelCheckpoint(filepath='{epoch:02d}-{val_loss:.2f}.hdf5') - >>> # saves file like: /path/epoch_2-val_loss_0.2.hdf5 + Example:: + + # save epoch and val_loss in name + ModelCheckpoint(filepath='{epoch:02d}-{val_loss:.2f}.hdf5') + # saves file like: /path/epoch_2-val_loss_0.2.hdf5 monitor (str): quantity to monitor. verbose (bool): verbosity mode, 0 or 1. save_top_k (int): if `save_top_k == k`, @@ -196,14 +198,15 @@ class ModelCheckpoint(Callback): is saved (`model.save(filepath)`). period (int): Interval (number of epochs) between checkpoints. - Example: - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.callbacks import ModelCheckpoint - >>> - >>> checkpoint_callback = ModelCheckpoint(filepath='my_path') - >>> Trainer(checkpoint_callback=checkpoint_callback) + Example:: + + from pytorch_lightning import Trainer + from pytorch_lightning.callbacks import ModelCheckpoint + + checkpoint_callback = ModelCheckpoint(filepath='my_path') + Trainer(checkpoint_callback=checkpoint_callback) - >>> # saves checkpoints to my_path whenever 'val_loss' has a new min + # saves checkpoints to my_path whenever 'val_loss' has a new min """ def __init__(self, filepath, monitor='val_loss', verbose=0, @@ -356,12 +359,13 @@ class GradientAccumulationScheduler(Callback): scheduling (dict): scheduling in format {epoch: accumulation_factor} Example: - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.callbacks import GradientAccumulationScheduler - >>> - >>> # at epoch 5 start accumulating every 2 batches - >>> accumulator = GradientAccumulationScheduler(scheduling: {5: 2}) - >>> Trainer(accumulate_grad_batches=accumulator) + + from pytorch_lightning import Trainer + from pytorch_lightning.callbacks import GradientAccumulationScheduler + + # at epoch 5 start accumulating every 2 batches + accumulator = GradientAccumulationScheduler(scheduling: {5: 2}) + Trainer(accumulate_grad_batches=accumulator) """ def __init__(self, scheduling: dict): From 00d5285ce41b6965b9fe51479072e4ec78bf682e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:50:54 -0500 Subject: [PATCH 062/155] finished lightning module --- pytorch_lightning/callbacks/pt_callbacks.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 117c47060b097..72d41abfafdfe 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -86,12 +86,14 @@ class EarlyStopping(Callback): monitored has stopped increasing; in `auto` mode, the direction is automatically inferred from the name of the monitored quantity. + Example: - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.callbacks import EarlyStopping - >>> - >>> early_stopping = EarlyStopping('val_loss') - >>> Trainer(early_stop_callback=early_stopping) + + from pytorch_lightning import Trainer + from pytorch_lightning.callbacks import EarlyStopping + + early_stopping = EarlyStopping('val_loss') + Trainer(early_stop_callback=early_stopping) """ def __init__(self, monitor='val_loss', From f162fd1e95145bc4ebe17a867dfad0f3179c9e8b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:51:23 -0500 Subject: [PATCH 063/155] finished lightning module --- pytorch_lightning/callbacks/pt_callbacks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 72d41abfafdfe..4c7d877a85bd6 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -87,7 +87,7 @@ class EarlyStopping(Callback): mode, the direction is automatically inferred from the name of the monitored quantity. - Example: + Example:: from pytorch_lightning import Trainer from pytorch_lightning.callbacks import EarlyStopping @@ -360,7 +360,7 @@ class GradientAccumulationScheduler(Callback): Args: scheduling (dict): scheduling in format {epoch: accumulation_factor} - Example: + Example:: from pytorch_lightning import Trainer from pytorch_lightning.callbacks import GradientAccumulationScheduler From 938bb4a837a0c98f3fe2ca2e6c7da43908b9716b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:02:25 -0500 Subject: [PATCH 064/155] added callbacks --- pytorch_lightning/logging/__init__.py | 120 +++----------------------- 1 file changed, 12 insertions(+), 108 deletions(-) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 9a588bacb87fd..72eccf3dfd73e 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -1,36 +1,20 @@ """ -Lighting offers options for logging information about model, gpu usage, etc, - via several different logging frameworks. It also offers printing options for training monitoring. - -**default_save_path** - -Lightning sets a default TestTubeLogger and CheckpointCallback for you which log to -`os.getcwd()` by default. To modify the logging path you can set:: - - Trainer(default_save_path='/your/path/to/save/checkpoints') - - -If you need more custom behavior (different paths for both, different metrics, etc...) - from the logger and the checkpointCallback, pass in your own instances as explained below. - -Setting up logging ------------------- - -The trainer inits a default logger for you (TestTubeLogger). All logs will -go to the current working directory under a folder named `os.getcwd()/lightning_logs`. - -If you want to modify the default logging behavior even more, pass in a logger - (which should inherit from `LightningBaseLogger`). +Lightning supports most popular logging frameworks (Tensorboard, comet, weights and biases, etc...). +To use a logger, simply pass it into the trainer. .. code-block:: python + from pytorch_lightning import logging - my_logger = MyLightningLogger(...) - trainer = Trainer(logger=my_logger) + # lightning uses tensorboard by default + tb_logger = logging.TensorBoardLogger() + trainer = Trainer(logger=tb_logger) + # or choose from any of the others such as MLFlow, Comet, Neptune, Wandb + comet_logger = logging.CometLogger() + trainer = Trainer(logger=comet_logger) -The path in this logger will overwrite `default_save_path`. - -Lightning supports several common experiment tracking frameworks out of the box +.. note:: All loggers log by default to `os.getcwd()`. To change the path without creating a logger set + Trainer(default_save_path='/your/path/to/save/checkpoints') Custom logger ------------- @@ -73,7 +57,7 @@ def finalize(self, status): Using loggers ------------- -You can call the logger anywhere from your LightningModule by doing: +Call the logger anywhere from your LightningModule by doing: .. code-block:: python @@ -83,86 +67,6 @@ def train_step(...): def any_lightning_module_function_or_hook(...): self.logger.experiment.add_histogram(...) - -Display metrics in progress bar -------------------------------- - -.. code-block:: python - - # DEFAULT - trainer = Trainer(show_progress_bar=True) - -Log metric row every k batches ------------------------------- - -Every k batches lightning will make an entry in the metrics log - -.. code-block:: python - - # DEFAULT (ie: save a .csv log file every 10 batches) - trainer = Trainer(row_log_interval=10) - -Log GPU memory --------------- - -Logs GPU memory when metrics are logged. - -.. code-block:: python - - # DEFAULT - trainer = Trainer(log_gpu_memory=None) - - # log only the min/max utilization - trainer = Trainer(log_gpu_memory='min_max') - - # log all the GPU memory (if on DDP, logs only that node) - trainer = Trainer(log_gpu_memory='all') - -Process position ----------------- - -When running multiple models on the same machine we want to decide which progress bar to use. - Lightning will stack progress bars according to this value. - -.. code-block:: python - - # DEFAULT - trainer = Trainer(process_position=0) - - # if this is the second model on the node, show the second progress bar below - trainer = Trainer(process_position=1) - - -Save a snapshot of all hyperparameters --------------------------------------- - -Automatically log hyperparameters stored in the `hparams` attribute as an `argparse.Namespace` - -.. code-block:: python - - class MyModel(pl.Lightning): - def __init__(self, hparams): - self.hparams = hparams - - ... - - args = parser.parse_args() - model = MyModel(args) - - logger = TestTubeLogger(...) - t = Trainer(logger=logger) - trainer.fit(model) - -Write logs file to csv every k batches --------------------------------------- - -Every k batches, lightning will write the new logs to disk - -.. code-block:: python - - # DEFAULT (ie: save a .csv log file every 100 batches) - trainer = Trainer(log_save_interval=100) - """ from os import environ From 21f6c8b6e08c6c353319fd4ca06b832caf6655d2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:06:32 -0500 Subject: [PATCH 065/155] added loggers --- pytorch_lightning/logging/__init__.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 72eccf3dfd73e..26c13b2da854d 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -67,6 +67,9 @@ def train_step(...): def any_lightning_module_function_or_hook(...): self.logger.experiment.add_histogram(...) + +Supported Loggers +----------------- """ from os import environ @@ -74,18 +77,23 @@ def any_lightning_module_function_or_hook(...): from .base import LightningLoggerBase, rank_zero_only from .tensorboard import TensorBoardLogger +all = [] + try: from .test_tube import TestTubeLogger + all.append('TestTubeLogger') except ImportError: pass try: from .mlflow import MLFlowLogger + all.append('MLFlowLogger') except ImportError: pass try: from .wandb import WandbLogger + all.append('WandbLogger') except ImportError: pass try: @@ -93,10 +101,14 @@ def any_lightning_module_function_or_hook(...): environ["COMET_DISABLE_AUTO_LOGGING"] = "1" from .comet import CometLogger + all.append('CometLogger') except ImportError: del environ["COMET_DISABLE_AUTO_LOGGING"] try: from .neptune import NeptuneLogger + all.append('NeptuneLogger') except ImportError: pass + +__all__ = all From 275ebdcc5102d35f1a5d46ff8c25b16c564cd156 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:19:07 -0500 Subject: [PATCH 066/155] added loggers --- pytorch_lightning/logging/__init__.py | 31 +++++---- pytorch_lightning/logging/comet.py | 81 ++++++++++++++++++------ pytorch_lightning/logging/mlflow.py | 9 +++ pytorch_lightning/logging/neptune.py | 10 +++ pytorch_lightning/logging/tensorboard.py | 11 +++- pytorch_lightning/logging/test_tube.py | 10 +++ pytorch_lightning/logging/wandb.py | 9 +++ 7 files changed, 125 insertions(+), 36 deletions(-) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 26c13b2da854d..be48000dfd52b 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -77,13 +77,16 @@ def any_lightning_module_function_or_hook(...): from .base import LightningLoggerBase, rank_zero_only from .tensorboard import TensorBoardLogger -all = [] +all = ['TensorBoardLogger'] try: - from .test_tube import TestTubeLogger - all.append('TestTubeLogger') + # needed to prevent ImportError and duplicated logs. + environ["COMET_DISABLE_AUTO_LOGGING"] = "1" + + from .comet import CometLogger + all.append('CometLogger') except ImportError: - pass + del environ["COMET_DISABLE_AUTO_LOGGING"] try: from .mlflow import MLFlowLogger @@ -92,23 +95,23 @@ def any_lightning_module_function_or_hook(...): pass try: - from .wandb import WandbLogger - all.append('WandbLogger') + from .neptune import NeptuneLogger + all.append('NeptuneLogger') except ImportError: pass -try: - # needed to prevent ImportError and duplicated logs. - environ["COMET_DISABLE_AUTO_LOGGING"] = "1" - from .comet import CometLogger - all.append('CometLogger') +try: + from .test_tube import TestTubeLogger + all.append('TestTubeLogger') except ImportError: - del environ["COMET_DISABLE_AUTO_LOGGING"] + pass try: - from .neptune import NeptuneLogger - all.append('NeptuneLogger') + from .wandb import WandbLogger + all.append('WandbLogger') except ImportError: pass + + __all__ = all diff --git a/pytorch_lightning/logging/comet.py b/pytorch_lightning/logging/comet.py index 3fe254185b827..81cb894065329 100644 --- a/pytorch_lightning/logging/comet.py +++ b/pytorch_lightning/logging/comet.py @@ -1,12 +1,11 @@ """ -Log using `comet `_ - -Comet logger can be used in either online or offline mode. -To log in online mode, CometLogger requries an API key: - .. code-block:: python + # ------------------- + # ONLINE MODE + # ------------------- from pytorch_lightning.logging import CometLogger + # arguments made to CometLogger are passed on to the comet_ml.Experiment class comet_logger = CometLogger( api_key=os.environ["COMET_KEY"], @@ -17,10 +16,9 @@ ) trainer = Trainer(logger=comet_logger) -To log in offline mode, CometLogger requires a path to a local directory: - -.. code-block:: python - + # ------------------- + # OFFLINE MODE + # ------------------- from pytorch_lightning.logging import CometLogger # arguments made to CometLogger are passed on to the comet_ml.Experiment class comet_logger = CometLogger( @@ -71,18 +69,54 @@ def any_lightning_module_function_or_hook(...): class CometLogger(LightningLoggerBase): def __init__(self, api_key=None, save_dir=None, workspace=None, rest_api_key=None, project_name=None, experiment_name=None, **kwargs): - """Initialize a Comet.ml logger. + r""" + + Log using `comet `_. + Requires either an API Key (online mode) or a local directory path (offline mode) - :param str api_key: Required in online mode. API key, found on Comet.ml - :param str save_dir: Required in offline mode. The path for the directory to save local comet logs - :param str workspace: Optional. Name of workspace for this user - :param str project_name: Optional. Send your experiment to a specific project. - Otherwise will be sent to Uncategorized Experiments. - If project name does not already exists Comet.ml will create a new project. - :param str rest_api_key: Optional. Rest API key found in Comet.ml settings. - This is used to determine version number - :param str experiment_name: Optional. String representing the name for this particular experiment on Comet.ml + .. code-block:: python + + # ONLINE MODE + from pytorch_lightning.logging import CometLogger + + # arguments made to CometLogger are passed on to the comet_ml.Experiment class + comet_logger = CometLogger( + api_key=os.environ["COMET_KEY"], + workspace=os.environ["COMET_WORKSPACE"], # Optional + project_name="default_project", # Optional + rest_api_key=os.environ["COMET_REST_KEY"], # Optional + experiment_name="default" # Optional + ) + trainer = Trainer(logger=comet_logger) + + + .. code-block:: python + + # OFFLINE MODE + from pytorch_lightning.logging import CometLogger + + # arguments made to CometLogger are passed on to the comet_ml.Experiment class + comet_logger = CometLogger( + save_dir=".", + workspace=os.environ["COMET_WORKSPACE"], # Optional + project_name="default_project", # Optional + rest_api_key=os.environ["COMET_REST_KEY"], # Optional + experiment_name="default" # Optional + ) + trainer = Trainer(logger=comet_logger) + + Args: + api_key (str): Required in online mode. API key, found on Comet.ml + save_dir (str): Required in offline mode. The path for the directory to save local comet logs + workspace (str): Optional. Name of workspace for this user + project_name (str): Optional. Send your experiment to a specific project. + Otherwise will be sent to Uncategorized Experiments. + If project name does not already exists Comet.ml will create a new project. + rest_api_key (str): Optional. Rest API key found in Comet.ml settings. + This is used to determine version number + experiment_name (str): Optional. String representing the name for this particular experiment on Comet.ml + """ super().__init__() self._experiment = None @@ -124,6 +158,15 @@ def __init__(self, api_key=None, save_dir=None, workspace=None, @property def experiment(self): + r""" + + Actual comet object. To use comet features do the following. + + Example:: + + self.logger.experiment.some_comet_function() + + """ if self._experiment is not None: return self._experiment diff --git a/pytorch_lightning/logging/mlflow.py b/pytorch_lightning/logging/mlflow.py index 5769693d6cc57..9a786db270008 100644 --- a/pytorch_lightning/logging/mlflow.py +++ b/pytorch_lightning/logging/mlflow.py @@ -47,6 +47,15 @@ def __init__(self, experiment_name, tracking_uri=None, tags=None): @property def experiment(self): + r""" + + Actual mlflow object. To use mlflow features do the following. + + Example:: + + self.logger.experiment.some_mlflow_function() + + """ return self._mlflow_client @property diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py index 7ce9f1f1f1e5a..6f4a1281e15ff 100644 --- a/pytorch_lightning/logging/neptune.py +++ b/pytorch_lightning/logging/neptune.py @@ -111,6 +111,16 @@ def __init__(self, api_key=None, project_name=None, offline_mode=False, @property def experiment(self): + r""" + + Actual neptune object. To use neptune features do the following. + + Example:: + + self.logger.experiment.some_neptune_function() + + """ + if self._experiment is not None: return self._experiment else: diff --git a/pytorch_lightning/logging/tensorboard.py b/pytorch_lightning/logging/tensorboard.py index 85279ce728d7b..e5f0d2e252f15 100644 --- a/pytorch_lightning/logging/tensorboard.py +++ b/pytorch_lightning/logging/tensorboard.py @@ -45,10 +45,15 @@ def __init__(self, save_dir, name="default", version=None, **kwargs): @property def experiment(self): - """The underlying :class:`torch.utils.tensorboard.SummaryWriter`. + r""" - :rtype: torch.utils.tensorboard.SummaryWriter - """ + Actual tensorboard object. To use tensorboard features do the following. + + Example:: + + self.logger.experiment.some_tensorboard_function() + + """ if self._experiment is not None: return self._experiment diff --git a/pytorch_lightning/logging/test_tube.py b/pytorch_lightning/logging/test_tube.py index 73da794f7119e..6144368f7c1b2 100644 --- a/pytorch_lightning/logging/test_tube.py +++ b/pytorch_lightning/logging/test_tube.py @@ -55,6 +55,16 @@ def __init__( @property def experiment(self): + r""" + + Actual test-tube object. To use test-tube features do the following. + + Example:: + + self.logger.experiment.some_test_tube_function() + + """ + if self._experiment is not None: return self._experiment diff --git a/pytorch_lightning/logging/wandb.py b/pytorch_lightning/logging/wandb.py index 6846f868f650b..d076aad2ad080 100644 --- a/pytorch_lightning/logging/wandb.py +++ b/pytorch_lightning/logging/wandb.py @@ -68,6 +68,15 @@ def __getstate__(self): @property def experiment(self): + r""" + + Actual wandb object. To use wandb features do the following. + + Example:: + + self.logger.experiment.some_wandb_function() + + """ if self._experiment is None: if self._offline: os.environ["WANDB_MODE"] = "dryrun" From 67a8644c9663356ee1a0145e5bc3e1a4cf727be9 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:19:40 -0500 Subject: [PATCH 067/155] added loggers --- pytorch_lightning/logging/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index be48000dfd52b..1bb34759e0b12 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -77,7 +77,7 @@ def any_lightning_module_function_or_hook(...): from .base import LightningLoggerBase, rank_zero_only from .tensorboard import TensorBoardLogger -all = ['TensorBoardLogger'] +all = [] try: # needed to prevent ImportError and duplicated logs. @@ -100,6 +100,8 @@ def any_lightning_module_function_or_hook(...): except ImportError: pass +all.append('TensorBoardLogger') + try: from .test_tube import TestTubeLogger all.append('TestTubeLogger') From ae3dbf556df1118d2a0d4bdeaeb6e8c4ef6c5a00 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:20:52 -0500 Subject: [PATCH 068/155] added loggers --- pytorch_lightning/logging/comet.py | 47 ------------------------------ 1 file changed, 47 deletions(-) diff --git a/pytorch_lightning/logging/comet.py b/pytorch_lightning/logging/comet.py index 81cb894065329..fbf4f839cda7a 100644 --- a/pytorch_lightning/logging/comet.py +++ b/pytorch_lightning/logging/comet.py @@ -1,50 +1,3 @@ -""" -.. code-block:: python - - # ------------------- - # ONLINE MODE - # ------------------- - from pytorch_lightning.logging import CometLogger - - # arguments made to CometLogger are passed on to the comet_ml.Experiment class - comet_logger = CometLogger( - api_key=os.environ["COMET_KEY"], - workspace=os.environ["COMET_WORKSPACE"], # Optional - project_name="default_project", # Optional - rest_api_key=os.environ["COMET_REST_KEY"], # Optional - experiment_name="default" # Optional - ) - trainer = Trainer(logger=comet_logger) - - # ------------------- - # OFFLINE MODE - # ------------------- - from pytorch_lightning.logging import CometLogger - # arguments made to CometLogger are passed on to the comet_ml.Experiment class - comet_logger = CometLogger( - save_dir=".", - workspace=os.environ["COMET_WORKSPACE"], # Optional - project_name="default_project", # Optional - rest_api_key=os.environ["COMET_REST_KEY"], # Optional - experiment_name="default" # Optional - ) - trainer = Trainer(logger=comet_logger) - - -Use the logger anywhere in you LightningModule as follows: - -.. code-block:: python - - def train_step(...): - # example - self.logger.experiment.whatever_comet_ml_supports(...) - - def any_lightning_module_function_or_hook(...): - self.logger.experiment.whatever_comet_ml_supports(...) - - -""" - from logging import getLogger try: From 83a233e69197aac9ccf1292da5b074ec75be9b1a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:24:03 -0500 Subject: [PATCH 069/155] added loggers --- pytorch_lightning/logging/mlflow.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pytorch_lightning/logging/mlflow.py b/pytorch_lightning/logging/mlflow.py index 9a786db270008..6dd77d5dc8f06 100644 --- a/pytorch_lightning/logging/mlflow.py +++ b/pytorch_lightning/logging/mlflow.py @@ -39,6 +39,15 @@ def any_lightning_module_function_or_hook(...): class MLFlowLogger(LightningLoggerBase): def __init__(self, experiment_name, tracking_uri=None, tags=None): + r""" + + Logs using MLFlow + + Args: + experiment_name (str): The name of the experiment + tracking_uri (str): where this should track + tags (dict): ? + """ super().__init__() self._mlflow_client = mlflow.tracking.MlflowClient(tracking_uri) self.experiment_name = experiment_name From 48d1df042d8bcbbbf589a49620e3a2b456106f94 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:24:41 -0500 Subject: [PATCH 070/155] added loggers --- pytorch_lightning/logging/mlflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/logging/mlflow.py b/pytorch_lightning/logging/mlflow.py index 6dd77d5dc8f06..50f4843e0f6c9 100644 --- a/pytorch_lightning/logging/mlflow.py +++ b/pytorch_lightning/logging/mlflow.py @@ -46,7 +46,7 @@ def __init__(self, experiment_name, tracking_uri=None, tags=None): Args: experiment_name (str): The name of the experiment tracking_uri (str): where this should track - tags (dict): ? + tags (dict): todo this param """ super().__init__() self._mlflow_client = mlflow.tracking.MlflowClient(tracking_uri) From f82def41f24cf76baa604d3a91beba9fe17d63ee Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:29:13 -0500 Subject: [PATCH 071/155] added loggers --- pytorch_lightning/logging/neptune.py | 89 +++++++++++++++++++--------- 1 file changed, 61 insertions(+), 28 deletions(-) diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py index 6f4a1281e15ff..1b42a1358ffb4 100644 --- a/pytorch_lightning/logging/neptune.py +++ b/pytorch_lightning/logging/neptune.py @@ -57,34 +57,67 @@ class NeptuneLogger(LightningLoggerBase): def __init__(self, api_key=None, project_name=None, offline_mode=False, experiment_name=None, upload_source_files=None, params=None, properties=None, tags=None, **kwargs): - """Initialize a neptune.ml logger. - Requires either an API Key (online mode) or a local directory path (offline mode) - - :param str|None api_key: Required in online mode. Neputne API token, found on https://neptune.ml. - Read how to get your API key https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token. - :param str project_name: Required in online mode. Qualified name of a project in a form of - "namespace/project_name" for example "tom/minst-classification". - If None, the value of NEPTUNE_PROJECT environment variable will be taken. - You need to create the project in https://neptune.ml first. - :param bool offline_mode: Optional default False. If offline_mode=True no logs will be send to neptune. - Usually used for debug purposes. - :param str|None experiment_name: Optional. Editable name of the experiment. - Name is displayed in the experiment’s Details (Metadata section) and in experiments view as a column. - :param list|None upload_source_files: Optional. List of source files to be uploaded. - Must be list of str or single str. Uploaded sources are displayed in the experiment’s Source code tab. - If None is passed, Python file from which experiment was created will be uploaded. - Pass empty list ([]) to upload no files. Unix style pathname pattern expansion is supported. - For example, you can pass '*.py' to upload all python source files from the current directory. - For recursion lookup use '**/*.py' (for Python 3.5 and later). For more information see glob library. - :param dict|None params: Optional. Parameters of the experiment. After experiment creation params are read-only. - Parameters are displayed in the experiment’s Parameters section and each key-value pair can be - viewed in experiments view as a column. - :param dict|None properties: Optional default is {}. Properties of the experiment. - They are editable after experiment is created. Properties are displayed in the experiment’s Details and - each key-value pair can be viewed in experiments view as a column. - :param list|None tags: Optional default []. Must be list of str. Tags of the experiment. - They are editable after experiment is created (see: append_tag() and remove_tag()). - Tags are displayed in the experiment’s Details and can be viewed in experiments view as a column. + r""" + + Initialize a neptune.ml logger. + + .. note:: Requires either an API Key (online mode) or a local directory path (offline mode) + + .. code-block:: python + + # ONLINE MODE + from pytorch_lightning.logging import NeptuneLogger + # arguments made to NeptuneLogger are passed on to the neptune.experiments.Experiment class + + neptune_logger = NeptuneLogger( + api_key=os.environ["NEPTUNE_API_TOKEN"], + project_name="USER_NAME/PROJECT_NAME", + experiment_name="default", # Optional, + params={"max_epochs": 10}, # Optional, + tags=["pytorch-lightning","mlp"] # Optional, + ) + trainer = Trainer(max_epochs=10, logger=neptune_logger) + + .. code-block:: python + + # OFFLINE MODE + from pytorch_lightning.logging import NeptuneLogger + # arguments made to NeptuneLogger are passed on to the neptune.experiments.Experiment class + + neptune_logger = NeptuneLogger( + project_name="USER_NAME/PROJECT_NAME", + experiment_name="default", # Optional, + params={"max_epochs": 10}, # Optional, + tags=["pytorch-lightning","mlp"] # Optional, + ) + trainer = Trainer(max_epochs=10, logger=neptune_logger) + + Args: + api_key (str|None): Required in online mode. Neputne API token, found on https://neptune.ml. + Read how to get your API key https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token. + project_name (str): Required in online mode. Qualified name of a project in a form of + "namespace/project_name" for example "tom/minst-classification". + If None, the value of NEPTUNE_PROJECT environment variable will be taken. + You need to create the project in https://neptune.ml first. + offline_mode (bool): Optional default False. If offline_mode=True no logs will be send to neptune. + Usually used for debug purposes. + experiment_name (str|None): Optional. Editable name of the experiment. + Name is displayed in the experiment’s Details (Metadata section) and in experiments view as a column. + upload_source_files (list|None): Optional. List of source files to be uploaded. + Must be list of str or single str. Uploaded sources are displayed in the experiment’s Source code tab. + If None is passed, Python file from which experiment was created will be uploaded. + Pass empty list ([]) to upload no files. Unix style pathname pattern expansion is supported. + For example, you can pass '*.py' to upload all python source files from the current directory. + For recursion lookup use '**/*.py' (for Python 3.5 and later). For more information see glob library. + params (dict|None): Optional. Parameters of the experiment. After experiment creation params are read-only. + Parameters are displayed in the experiment’s Parameters section and each key-value pair can be + viewed in experiments view as a column. + properties (dict|None): Optional default is {}. Properties of the experiment. + They are editable after experiment is created. Properties are displayed in the experiment’s Details and + each key-value pair can be viewed in experiments view as a column. + tags (list|None): Optional default []. Must be list of str. Tags of the experiment. + They are editable after experiment is created (see: append_tag() and remove_tag()). + Tags are displayed in the experiment’s Details and can be viewed in experiments view as a column. """ super().__init__() self.api_key = api_key From 438708c33d741779cfa6ad8603370a552d11512b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:35:02 -0500 Subject: [PATCH 072/155] added loggers --- pytorch_lightning/logging/neptune.py | 2 +- pytorch_lightning/logging/tensorboard.py | 16 ++++++++------ pytorch_lightning/logging/test_tube.py | 27 ++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py index 1b42a1358ffb4..c6743f4989f8f 100644 --- a/pytorch_lightning/logging/neptune.py +++ b/pytorch_lightning/logging/neptune.py @@ -93,7 +93,7 @@ def __init__(self, api_key=None, project_name=None, offline_mode=False, trainer = Trainer(max_epochs=10, logger=neptune_logger) Args: - api_key (str|None): Required in online mode. Neputne API token, found on https://neptune.ml. + api_key (str | None): Required in online mode. Neputne API token, found on https://neptune.ml. Read how to get your API key https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token. project_name (str): Required in online mode. Qualified name of a project in a form of "namespace/project_name" for example "tom/minst-classification". diff --git a/pytorch_lightning/logging/tensorboard.py b/pytorch_lightning/logging/tensorboard.py index e5f0d2e252f15..937fc3e6fd37b 100644 --- a/pytorch_lightning/logging/tensorboard.py +++ b/pytorch_lightning/logging/tensorboard.py @@ -11,12 +11,15 @@ class TensorBoardLogger(LightningLoggerBase): - r"""Log to local file system in TensorBoard format + r""" + + Log to local file system in TensorBoard format Implemented using :class:`torch.utils.tensorboard.SummaryWriter`. Logs are saved to `os.path.join(save_dir, name, version)` - :example: + Example + -------- .. code-block:: python @@ -24,11 +27,12 @@ class TensorBoardLogger(LightningLoggerBase): trainer = Trainer(logger=logger) trainer.train(model) - :param str save_dir: Save directory - :param str name: Experiment name. Defaults to "default". - :param int version: Experiment version. If version is not specified the logger inspects the save + Args: + save_dir (str): Save directory + name (str): Experiment name. Defaults to "default". + version (int): Experiment version. If version is not specified the logger inspects the save directory for existing versions, then automatically assigns the next available version. - :param \**kwargs: Other arguments are passed directly to the :class:`SummaryWriter` constructor. + \**kwargs (dict): Other arguments are passed directly to the :class:`SummaryWriter` constructor. """ NAME_CSV_TAGS = 'meta_tags.csv' diff --git a/pytorch_lightning/logging/test_tube.py b/pytorch_lightning/logging/test_tube.py index 6144368f7c1b2..ccb50ef3ec700 100644 --- a/pytorch_lightning/logging/test_tube.py +++ b/pytorch_lightning/logging/test_tube.py @@ -38,6 +38,33 @@ def any_lightning_module_function_or_hook(...): class TestTubeLogger(LightningLoggerBase): + r""" + + Log to local file system in TensorBoard format but using a nicer folder structure. + + Implemented using :class:`torch.utils.tensorboard.SummaryWriter`. Logs are saved to + `os.path.join(save_dir, name, version)` + + Example + -------- + + .. code-block:: python + + logger = TestTubeLogger("tt_logs", name="my_exp_name") + trainer = Trainer(logger=logger) + trainer.train(model) + + Args: + save_dir (str): Save directory + name (str): Experiment name. Defaults to "default". + description (str): A short snippet about this experiment + debug (bool): If True, it doesn't log anything + version (int): Experiment version. If version is not specified the logger inspects the save + directory for existing versions, then automatically assigns the next available version. + create_git_tag (bool): If True creates a git tag to save the code used in this experiment + + """ + __test__ = False def __init__( From c3d3c47554b8c2cdd725ca45a388b5489b3e4158 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:36:05 -0500 Subject: [PATCH 073/155] set auto dp if no backend --- pytorch_lightning/logging/wandb.py | 34 +++++++++--------------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/pytorch_lightning/logging/wandb.py b/pytorch_lightning/logging/wandb.py index d076aad2ad080..829006d85d0dd 100644 --- a/pytorch_lightning/logging/wandb.py +++ b/pytorch_lightning/logging/wandb.py @@ -1,27 +1,3 @@ -""" -Log using `W&B `_ - -.. code-block:: python - - >>> from pytorch_lightning.logging import WandbLogger - >>> from pytorch_lightning import Trainer - >>> wandb_logger = WandbLogger() - >>> trainer = Trainer(logger=wandb_logger) - - -Use the logger anywhere in you LightningModule as follows: - -.. code-block:: python - - def train_step(...): - # example - self.logger.experiment.whatever_wandb_supports(...) - - def any_lightning_module_function_or_hook(...): - self.logger.experiment.whatever_wandb_supports(...) - -""" - import os try: @@ -44,6 +20,16 @@ class WandbLogger(LightningLoggerBase): anonymous (bool): enables or explicitly disables anonymous logging. project (str): the name of the project to which this run will belong. tags (list of str): tags associated with this run. + + Example + -------- + .. code-block:: python + + from pytorch_lightning.logging import WandbLogger + from pytorch_lightning import Trainer + + wandb_logger = WandbLogger() + trainer = Trainer(logger=wandb_logger) """ def __init__(self, name=None, save_dir=None, offline=False, id=None, anonymous=False, From 6c0debf1e3055c9f8a91da284fccb192933a180f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:37:43 -0500 Subject: [PATCH 074/155] added loggers --- docs/source/new-project.rst | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 62de6879ae0b0..35834518355d8 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -68,8 +68,5 @@ Then you could do rapid research by switching between these two and using the sa 1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. 2. You get free GPU and 16-bit support without writing any of that code in your model. -3. You also get all of the capabilities below (without coding or testing yourself). +3. You also get early stopping, multi-gpu training, 16-bit and MUCH more without coding anything! -- :ref:`Callbacks` -- :ref:`Examples & Tutorials` -- :ref:`Examples & Tutorials` From a66f56abf2878391175bd569aad187e9df2f2799 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:39:09 -0500 Subject: [PATCH 075/155] added loggers --- docs/source/index.rst | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index c219884b353c8..c1d2463116494 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -12,7 +12,6 @@ PyTorch-Lightning Documentation :caption: Start Here new-project - examples .. toctree:: :maxdepth: 4 @@ -24,6 +23,21 @@ PyTorch-Lightning Documentation logging trainer +.. toctree:: + :maxdepth: 1 + :name: Examples + :caption: Examples + + examples + +.. toctree:: + :maxdepth: 1 + :name: Tutorials + :caption: Tutorials + + examples + + .. toctree:: :maxdepth: 1 :name: community From 8bff7e38da51dd4db60b89f4de0a6882dc926e1b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:41:11 -0500 Subject: [PATCH 076/155] added loggers --- docs/source/index.rst | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index c1d2463116494..5c97c9e76613b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -35,7 +35,14 @@ PyTorch-Lightning Documentation :name: Tutorials :caption: Tutorials - examples + tutorials + +.. toctree:: + :maxdepth: 1 + :name: Common Use Cases + :caption: Common Use Cases + + common-cases .. toctree:: From 1e2a37d6d44b1aac326c3e6a94829c4019f5d16f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:42:12 -0500 Subject: [PATCH 077/155] added loggers --- docs/source/examples.rst | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 3f15f06e4e99d..a9158ae422330 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -1,8 +1,14 @@ -Examples & Tutorials +Examples ==================== +MNIST +----- +Some MNIST example -.. toctree:: - :maxdepth: 3 +GAN +---- +GAN example - pl_examples \ No newline at end of file +BERT +---- +BERT example From 22b203d71b13a2e757c748207dba8c53f777385e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:42:24 -0500 Subject: [PATCH 078/155] added loggers --- docs/source/examples.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/examples.rst b/docs/source/examples.rst index a9158ae422330..92fa0b8004d3f 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -9,6 +9,6 @@ GAN ---- GAN example -BERT ----- +BERT Finetuning +---------------- BERT example From f02d0bcbb9e9f7391bd547140a62327c977aaec5 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 18:08:04 -0500 Subject: [PATCH 079/155] added loggers --- docs/source/examples.rst | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 92fa0b8004d3f..a734243d5d41b 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -1,14 +1,34 @@ -Examples -==================== +GAN +==== +.. toctree:: + :maxdepth: 3 + + pl_examples.domain_templates.gan MNIST ------ -Some MNIST example +==== +.. toctree:: + :maxdepth: 3 -GAN ----- -GAN example + pl_examples.basic_examples.lightning_module_template + +Multi-node (ddp) MNIST +==== +.. toctree:: + :maxdepth: 3 + + pl_examples.multi_node_examples.multi_node_ddp_demo + +Multi-node (ddp2) MNIST +==== +.. toctree:: + :maxdepth: 3 + + pl_examples.multi_node_examples.multi_node_ddp2_demo + +Imagenet +==== +.. toctree:: + :maxdepth: 3 -BERT Finetuning ----------------- -BERT example + pl_examples.full_examples.imagenet.imagenet_example From 57db191028db2d5ec201cce99cbbbfbac59c03a3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 17 Jan 2020 05:03:31 -0500 Subject: [PATCH 080/155] flake 8 --- pytorch_lightning/core/__init__.py | 3 +- pytorch_lightning/core/lightning.py | 6 +- pytorch_lightning/logging/__init__.py | 2 - pytorch_lightning/logging/neptune.py | 3 +- pytorch_lightning/trainer/__init__.py | 6 +- pytorch_lightning/trainer/trainer.py | 81 ++++++++++++++------------- tests/test_trainer.py | 2 +- 7 files changed, 53 insertions(+), 50 deletions(-) diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index 8a4223448f48a..17d7619de3663 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -90,7 +90,8 @@ def test_dataloader(self): trainer.fit(model) -Check out this `COLAB `_ +Check out this +`COLAB `_ for a live demo. """ diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 30e4a7d66cf8b..e1a328e48cf8b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -257,9 +257,11 @@ def validation_step(self, *args, **kwargs): In this step you'd normally generate examples or calculate anything of interest such as accuracy. Args: - batch (torch.nn.Tensor | (Tensor, Tensor) | [Tensor, Tensor]): The output of your dataloader. A tensor, tuple or list + batch (torch.nn.Tensor | (Tensor, Tensor) | [Tensor, Tensor]): The output of your dataloader. + A tensor, tuple or list batch_idx (int): The index of this batch - dataloader_idx (int): The index of the dataloader that produced this batch (only if multiple val datasets used) + dataloader_idx (int): The index of the dataloader that produced this batch (only if multiple + val datasets used) Return: Dict or OrderedDict - passed to the validation_end step diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 1bb34759e0b12..5fbb93cddc14d 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -114,6 +114,4 @@ def any_lightning_module_function_or_hook(...): except ImportError: pass - - __all__ = all diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py index c6743f4989f8f..7c677962df70e 100644 --- a/pytorch_lightning/logging/neptune.py +++ b/pytorch_lightning/logging/neptune.py @@ -94,7 +94,8 @@ def __init__(self, api_key=None, project_name=None, offline_mode=False, Args: api_key (str | None): Required in online mode. Neputne API token, found on https://neptune.ml. - Read how to get your API key https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token. + Read how to get your API key + https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token. project_name (str): Required in online mode. Qualified name of a project in a form of "namespace/project_name" for example "tom/minst-classification". If None, the value of NEPTUNE_PROJECT environment variable will be taken. diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 893b5cbe1d2ca..98c2b99b56357 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,7 +1,7 @@ """ The trainer de-couples the engineering code (16-bit, early stopping, GPU distribution, etc...) from the -science code (GAN, BERT, your project, etc...). It uses many assumptions which are best practices in +science code (GAN, BERT, your project, etc...). It uses many assumptions which are best practices in AI research today. The trainer automates all parts of training except: @@ -9,9 +9,9 @@ - what happens in training , test, val loop - where the data come from - which optimizers to use -- how to do the computations +- how to do the computations -The Trainer delegates those calls to your LightningModule which defines how to do those parts. +The Trainer delegates those calls to your LightningModule which defines how to do those parts. This is the basic use of the trainer: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 565754f416c88..dd68293a1cf36 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -108,7 +108,7 @@ def __init__( Trainer(logger=logger) checkpoint_callback (:class:`CheckpointCallback`): Callback for checkpointing. Example:: - from pytorch_lightning.callbacks import ModelCheckpoint + from pytorch_lightning.callbacks import ModelCheckpoint # default used by the Trainer checkpoint_callback = ModelCheckpoint( @@ -123,7 +123,7 @@ def __init__( trainer = Trainer(checkpoint_callback=checkpoint_callback) early_stop_callback (:class:`.EarlyStopping`): Callback for early stopping Example:: - from pytorch_lightning.callbacks import EarlyStopping + from pytorch_lightning.callbacks import EarlyStopping # default used by the Trainer early_stop_callback = EarlyStopping( @@ -158,11 +158,11 @@ def __init__( # to train on 8 nodes trainer = Trainer(num_nodes=8) - + nb_gpu_nodes (int): .. deprecated:: 0.5.0 Use `num_nodes` instead. Will remove 0.8.0. - + gpus (list|str|int): Which GPUs to train on. Example:: # default used by the Trainer (ie: train on CPU) @@ -206,7 +206,7 @@ def __init__( # use only 1% of the train, test, val datasets trainer = Trainer(overfit_pct=0.01) - + track_grad_norm (int): -1 no tracking. Otherwise tracks that norm Example:: # default used by the Trainer @@ -230,7 +230,7 @@ def __init__( # runs 1 train, val, test batch and program ends trainer = Trainer(fast_dev_run=True) - + accumulate_grad_batches (int|dict): Accumulates grads every k batches or as set up in the dict. Example:: # default used by the Trainer (no accumulation) @@ -246,7 +246,7 @@ def __init__( Example:: # default used by the Trainer trainer = Trainer(max_epochs=1000) - + max_nb_epochs (int): .. deprecated:: 0.5.0 Use `max_epochs` instead. Will remove 0.8.0. @@ -255,12 +255,12 @@ def __init__( Example:: # default used by the Trainer trainer = Trainer(min_epochs=1) - + min_nb_epochs (int): .. deprecated:: 0.5.0 Use `min_nb_epochs` instead. Will remove 0.8.0. - train_percent_check (int): How much of training dataset to check. + train_percent_check (int): How much of training dataset to check. Useful when debugging or testing something that happens at the end of an epoch. Example:: # default used by the Trainer @@ -269,7 +269,7 @@ def __init__( # run through only 25% of the training set each epoch trainer = Trainer(train_percent_check=0.25) - val_percent_check (int): How much of validation dataset to check. + val_percent_check (int): How much of validation dataset to check. Useful when debugging or testing something that happens at the end of an epoch. Example:: # default used by the Trainer @@ -278,7 +278,7 @@ def __init__( # run through only 25% of the validation set each epoch trainer = Trainer(val_percent_check=0.25) - test_percent_check (int): How much of test dataset to check. + test_percent_check (int): How much of test dataset to check. Useful when debugging or testing something that happens at the end of an epoch. Example:: # default used by the Trainer @@ -305,12 +305,12 @@ def __init__( Example:: # default used by the Trainer trainer = Trainer(log_save_interval=100) - + row_log_interval (int): How often to add logging rows (does not write to disk) Example:: # default used by the Trainer trainer = Trainer(row_log_interval=10) - + add_row_log_interval (int): .. deprecated:: 0.5.0 Use `row_log_interval` instead. Will remove 0.8.0. @@ -324,8 +324,8 @@ def __init__( # dp = DataParallel (split a batch onto k gpus on same machine). trainer = Trainer(gpus=2, distributed_backend='dp') - # ddp = DistributedDataParallel - # Each gpu trains by itself on a subset of the data. + # ddp = DistributedDataParallel + # Each gpu trains by itself on a subset of the data. # Gradients sync across all gpus and all machines. trainer = Trainer(gpus=2, num_nodes=2, distributed_backend='ddp') @@ -334,29 +334,29 @@ def __init__( # syncs gradients across nodes like ddp # useful for things like increasing the number of negative samples trainer = Trainer(gpus=2, num_nodes=2, distributed_backend='ddp2') - + use_amp (bool): If true uses apex for 16bit precision Example:: # default used by the Trainer trainer = Trainer(use_amp=False) - + print_nan_grads (bool): Prints gradients with nan values Example:: # default used by the Trainer trainer = Trainer(print_nan_grads=False) - + weights_summary (str): Prints a summary of the weights when training begins. Options: 'full', 'top', None. Example:: # default used by the Trainer (ie: print all weights) trainer = Trainer(weights_summary='full') - + # print only the top level modules trainer = Trainer(weights_summary='top') - + # don't print a summary trainer = Trainer(weights_summary=None) - + weights_save_path (str): Where to save weights if specified. Example:: # default used by the Trainer @@ -369,7 +369,7 @@ def __init__( # **NOTE: this saves weights to some/path NOT my/path checkpoint_callback = ModelCheckpoint(filepath='some/path') trainer = Trainer( - checkpoint_callback=checkpoint_callback, + checkpoint_callback=checkpoint_callback, weights_save_path='my/path' ) @@ -378,7 +378,7 @@ def __init__( Example:: # default used by the Trainer trainer = Trainer(amp_level='O1') - + num_sanity_val_steps (int): Sanity check runs n batches of val before starting the training routine. This catches any bugs in your validation without having to wait for the first validation check. The Trainer uses 5 steps by default. Turn it off or modify it here. @@ -392,12 +392,13 @@ def __init__( nb_sanity_val_steps (int): .. deprecated:: 0.5.0 Use `num_sanity_val_steps` instead. Will remove 0.8.0. - - truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of a much longer sequence - If this is enabled, your batches will automatically get truncated - and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence dimension. - (`Williams et al. "An efficient gradient-based algorithm for on-line training of recurrent network trajectories." - `_) + + truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of + a much longer sequence If this is enabled, your batches will automatically get truncated + and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence + dimension. (`Williams et al. "An efficient gradient-based algorithm for on-line training of + recurrent network trajectories." + `_) Example:: # default used by the Trainer (ie: disabled) trainer = Trainer(truncated_bptt_steps=None) @@ -413,7 +414,7 @@ def __init__( # resume from a specific checkpoint trainer = Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt') """ - # + # # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: # - `nb_sanity_val_steps` @@ -665,13 +666,13 @@ def tng_tqdm_dic(self): def fit(self, model): r""" Runs the full optimization routine. - + Example:: - + trainer = Trainer() model = LightningModule() - - trainer.fit() + + trainer.fit() """ # when using multi-node or DDP within a node start each module in a separate process if self.use_ddp2: @@ -829,27 +830,27 @@ def run_pretrain_routine(self, model): def test(self, model=None): r""" - + Separates from fit to make sure you never run on your test set until you want to. - + Args: model (LightningModule): The model to test. Example:: - + # Option 1 # run test after fitting trainer = Trainer() model = LightningModule() - + trainer.fit() trainer.test() - + # Option 2 # run test from a loaded model model = LightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') trainer = Trainer() - + trainer.test(model) """ self.testing = True diff --git a/tests/test_trainer.py b/tests/test_trainer.py index fca75c3fb12b3..2301104531cec 100644 --- a/tests/test_trainer.py +++ b/tests/test_trainer.py @@ -15,7 +15,7 @@ LightningValidationMultipleDataloadersMixin, LightningTestMultipleDataloadersMixin, ) -from pytorch_lightning.core.lightning import load_hparams_from_tags_csv +from pytorch_lightning.core.lightning import load_hparams_from_tags_csv from pytorch_lightning.trainer.logging import TrainerLoggingMixin From c6c4492475e6feecb3dec275e2f235d12cc7419a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 17 Jan 2020 05:31:16 -0500 Subject: [PATCH 081/155] flake 8 --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index 5c97c9e76613b..234331f1e248e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -53,6 +53,7 @@ PyTorch-Lightning Documentation CODE_OF_CONDUCT.md CONTRIBUTING.md BECOMING_A_CORE_CONTRIBUTOR.md + governance.md Indices and tables From 55062ad92603249bcf343081fd4224f0771bb60c Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 21 Jan 2020 13:20:55 -0500 Subject: [PATCH 082/155] fix docs path --- .gitignore | 3 ++- docs/source/callbacks.rst | 14 ++++++++++++++ docs/source/common-cases.rst | 21 +++++++++++++++++++++ docs/source/lightning-module.rst | 10 ++++++++++ docs/source/logging.rst | 12 ++++++++++++ docs/source/modules.rst | 7 +++++++ docs/source/trainer.rst | 21 +++++++++++++++++++++ docs/source/tutorials.rst | 20 ++++++++++++++++++++ 8 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 docs/source/callbacks.rst create mode 100644 docs/source/common-cases.rst create mode 100644 docs/source/lightning-module.rst create mode 100644 docs/source/logging.rst create mode 100644 docs/source/modules.rst create mode 100644 docs/source/trainer.rst create mode 100644 docs/source/tutorials.rst diff --git a/.gitignore b/.gitignore index 43541c9dcbe80..645f97d708262 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,8 @@ tests/save_dir default/ lightning_logs/ tests/tests/ -*.rst +pytorch_lightning*.rst +pl_examples*.rst /docs/source/*.md # Byte-compiled / optimized / DLL files diff --git a/docs/source/callbacks.rst b/docs/source/callbacks.rst new file mode 100644 index 0000000000000..ae8dd25f1f93b --- /dev/null +++ b/docs/source/callbacks.rst @@ -0,0 +1,14 @@ +.. role:: hidden + :class: hidden-section + +Callbacks +=========== +.. automodule:: pytorch_lightning.callbacks + :exclude-members: + _del_model, + _save_model, + on_epoch_end, + on_train_end, + on_epoch_begin, + check_monitor_top_k, + on_train_begin, \ No newline at end of file diff --git a/docs/source/common-cases.rst b/docs/source/common-cases.rst new file mode 100644 index 0000000000000..7b96a93d84660 --- /dev/null +++ b/docs/source/common-cases.rst @@ -0,0 +1,21 @@ +Multi-gpu (same node) training +============================== + +Multi-node training +==================== + +16-bit precision +================= + +gradient clipping +================= + +modifying training via hooks +============================= + + + +.. toctree:: + :maxdepth: 3 + + pl_examples \ No newline at end of file diff --git a/docs/source/lightning-module.rst b/docs/source/lightning-module.rst new file mode 100644 index 0000000000000..93c81537f65ca --- /dev/null +++ b/docs/source/lightning-module.rst @@ -0,0 +1,10 @@ +.. role:: hidden + :class: hidden-section + +LightningModule +=========== +.. automodule:: pytorch_lightning.core + :exclude-members: + _abc_impl, + summarize, + diff --git a/docs/source/logging.rst b/docs/source/logging.rst new file mode 100644 index 0000000000000..24f49f0ab15c0 --- /dev/null +++ b/docs/source/logging.rst @@ -0,0 +1,12 @@ +.. role:: hidden + :class: hidden-section + +Logging +=========== +.. automodule:: pytorch_lightning.logging + :exclude-members: + _abc_impl, + _save_model, + on_epoch_end, + on_train_end, + on_epoch_begin, diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000000000..e4c5121858c28 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +pl_examples +=========== + +.. toctree:: + :maxdepth: 4 + + pl_examples diff --git a/docs/source/trainer.rst b/docs/source/trainer.rst new file mode 100644 index 0000000000000..db2657dc2c429 --- /dev/null +++ b/docs/source/trainer.rst @@ -0,0 +1,21 @@ +.. role:: hidden + :class: hidden-section + +Trainer +=========== +.. automodule:: pytorch_lightning.trainer + :members: fit, test + :exclude-members: + run_pretrain_routine, + _abc_impl, + _Trainer__set_root_gpu, + _Trainer__init_optimizers, + _Trainer__parse_gpu_ids, + _Trainer__configure_schedulers, + data_parallel, + num_gpus, + slurm_job_id, + tng_tqdm_dic, + training_tqdm_dict, + init_optimizers, + configure_schedulers diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst new file mode 100644 index 0000000000000..7b66d141ff2f9 --- /dev/null +++ b/docs/source/tutorials.rst @@ -0,0 +1,20 @@ +Refactoring PyTorch into Lightning +================================== +`Tutorial `_ + +Start a research project +========================= +`Research seed `_ + +Basic Lightning use +==================== +`Tutorial `_ + +9 key Lightning tricks +======================== +`Tutorial <9 key speed features in Pytorch-Lightning>`_ + +Multi-node training on SLURM +============================= +`Tutorial `_ + From 8ae41329172871b345c58b3a1d174a08ad043360 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 15:12:54 -0500 Subject: [PATCH 083/155] updated gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index d10d5aba3c082..4a90411d1d01d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ test_tube_exp/ # Documentations docs/source/pl_examples*.rst docs/source/pytorch_lightning*.rst +tests/tests/ /docs/source/*.md # Byte-compiled / optimized / DLL files From a26c95bbf1805224ff8b69d933e3b341ce6c9bec Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 15:12:54 -0500 Subject: [PATCH 084/155] updated gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 4a90411d1d01d..3df09c9089f79 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ app/models/ pip-wheel-metadata/ lightning_logs/ + # Test-tube test_tube_logs/ test_tube_data/ From ed41c77e7bdeeebe62a6eb8dae4974f8537aa2dc Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 18:35:01 -0500 Subject: [PATCH 085/155] updated links in ninja file --- docs/source/_templates/theme_variables.jinja | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/_templates/theme_variables.jinja b/docs/source/_templates/theme_variables.jinja index b61b79d698bf2..3a67ad64d384d 100644 --- a/docs/source/_templates/theme_variables.jinja +++ b/docs/source/_templates/theme_variables.jinja @@ -2,16 +2,16 @@ 'github': 'https://github.com/PytorchLightning/pytorch-lightning', 'github_issues': 'https://github.com/PytorchLightning/pytorch-lightning/issues', 'contributing': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/CONTRIBUTING.md', - 'docs': 'https://pytorch-lightning.rtfd.io/en/latest', + 'docs': 'https://pytorchlightning.github.io/pytorch-lightning', 'twitter': 'https://twitter.com/PyTorchLightnin', 'discuss': 'https://discuss.pytorch.org', - 'tutorials': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'previous_pytorch_versions': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'home': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'get_started': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'features': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'blog': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'resources': 'https://pytorch-lightning.rtfd.io/en/latest/', - 'support': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'tutorials': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'previous_pytorch_versions': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'home': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'get_started': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'features': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'blog': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'resources': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'support': 'https://pytorchlightning.github.io/pytorch-lightning/', } -%} From 839c9da5b52a014c2b846e211b66d108e28bf375 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 19:44:02 -0500 Subject: [PATCH 086/155] updated docs --- docs/source/index.rst | 1 - docs/source/new-project.rst | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 234331f1e248e..aa1736e89d81b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -44,7 +44,6 @@ PyTorch-Lightning Documentation common-cases - .. toctree:: :maxdepth: 1 :name: community diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 35834518355d8..5c03b9e5c841c 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -69,4 +69,3 @@ Then you could do rapid research by switching between these two and using the sa 1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. 2. You get free GPU and 16-bit support without writing any of that code in your model. 3. You also get early stopping, multi-gpu training, 16-bit and MUCH more without coding anything! - From 55a305e43c7a77af0434848877e14a7ba0c79ddf Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:36:57 -0500 Subject: [PATCH 087/155] finished callbacks --- pytorch_lightning/callbacks/pt_callbacks.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 42a8336fa7c9f..001981e431cbf 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -1,15 +1,13 @@ """ Callbacks -========= - +==================================== Callbacks supported by Lightning """ -import logging import os import shutil +import logging import warnings - import numpy as np from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel @@ -165,7 +163,9 @@ def on_train_end(self, logs=None): class ModelCheckpoint(Callback): - r"""Save the model after every epoch. + r""" + + Save the model after every epoch. Args: filepath (str): path to save the model file. @@ -403,4 +403,4 @@ def on_epoch_begin(self, epoch, trainer): # should_stop = c.on_epoch_end(i, logs={'val_loss': loss}) # logging.info(loss) # if should_stop: -# break +# break \ No newline at end of file From 1a0437e410eceff58c2467372381761e39fb8fda Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:50:46 -0500 Subject: [PATCH 088/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 98c2b99b56357..41c69251a209c 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -27,4 +27,4 @@ from .trainer import Trainer -__all__ = ['Trainer'] +__all__ = ['Trainer'] \ No newline at end of file From 23cb27c887f4cf2e9853afccf4e607634ab9115a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:57:04 -0500 Subject: [PATCH 089/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 41c69251a209c..b16cbb87f12c0 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -23,6 +23,7 @@ trainer = Trainer() trainer.fit(model) + """ from .trainer import Trainer From b9e0898102f269d8568c08917c3219e858be0964 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:17:16 -0500 Subject: [PATCH 090/155] fixed left menu --- docs/source/index.rst | 2 +- pytorch_lightning/trainer/trainer.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index aa1736e89d81b..2f49871c219d0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -44,6 +44,7 @@ PyTorch-Lightning Documentation common-cases + .. toctree:: :maxdepth: 1 :name: community @@ -61,4 +62,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index cc9f9394f3961..a3cf882991e5f 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,5 +1,3 @@ - - import logging import os import sys From 805de07345c67a4ddab8b0dceca9b8c276371ec1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:29:30 -0500 Subject: [PATCH 091/155] added callbacks to menu --- docs/source/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 2f49871c219d0..001ac2cb7af8e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -55,7 +55,6 @@ PyTorch-Lightning Documentation BECOMING_A_CORE_CONTRIBUTOR.md governance.md - Indices and tables ------------------ From 7bb78c991fd5aca296a8ccd04ab3cc44c497a57e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:36:43 -0500 Subject: [PATCH 092/155] added direct links to docs --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index 001ac2cb7af8e..2f49871c219d0 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -55,6 +55,7 @@ PyTorch-Lightning Documentation BECOMING_A_CORE_CONTRIBUTOR.md governance.md + Indices and tables ------------------ From f52aec9766702dbca156e6c359ed95021322e913 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:43:04 -0500 Subject: [PATCH 093/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index b16cbb87f12c0..41c69251a209c 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -23,7 +23,6 @@ trainer = Trainer() trainer.fit(model) - """ from .trainer import Trainer From 6a2e00bac4f08c8a1bb6775f1188b4bb4feb1e2b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:49:12 -0500 Subject: [PATCH 094/155] added direct links to docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a3cf882991e5f..e89449e8864a8 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -18,6 +18,7 @@ parse_gpu_ids, determine_root_gpu_device ) + from pytorch_lightning.trainer.evaluation_loop import TrainerEvaluationLoopMixin from pytorch_lightning.trainer.logging import TrainerLoggingMixin from pytorch_lightning.trainer.model_hooks import TrainerModelHooksMixin From 179515083eb9d03b3bea5ac9d23e5f243589faeb Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 22:01:11 -0500 Subject: [PATCH 095/155] added direct links to docs --- pytorch_lightning/callbacks/pt_callbacks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 001981e431cbf..1549f54d653e3 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -382,6 +382,7 @@ def __init__(self, scheduling: dict): if minimal_epoch < 1: msg = f"Epochs indexing from 1, epoch {minimal_epoch} cannot be interpreted correct" raise IndexError(msg) + elif minimal_epoch != 1: # if user didnt define first epoch accumulation factor scheduling.update({1: 1}) From ce8b7197ced080489cf863795fe29c5017df4ff6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 06:04:17 -0500 Subject: [PATCH 096/155] added direct links to docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index e89449e8864a8..27fcc7f21ddf7 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -47,6 +47,7 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): + def __init__( self, logger=True, From 42951f211276fe889b84b5679eb9495b600c2d3d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 07:25:16 -0500 Subject: [PATCH 097/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 41c69251a209c..b16cbb87f12c0 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -23,6 +23,7 @@ trainer = Trainer() trainer.fit(model) + """ from .trainer import Trainer From 8288e4ebf8227c6917d2d1c7df121f9fe177318a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 15:12:54 -0500 Subject: [PATCH 098/155] updated gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3df09c9089f79..41641211aff94 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ __pycache__/ *$py.class timit_data/ + # C extensions *.so From 50c58c71d33a59f239e32d31c92fecdac3dc6820 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 19:44:02 -0500 Subject: [PATCH 099/155] updated docs --- docs/source/index.rst | 1 - pytorch_lightning/trainer/__init__.py | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 2f49871c219d0..686752891cad3 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -44,7 +44,6 @@ PyTorch-Lightning Documentation common-cases - .. toctree:: :maxdepth: 1 :name: community diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index b16cbb87f12c0..41c69251a209c 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -23,7 +23,6 @@ trainer = Trainer() trainer.fit(model) - """ from .trainer import Trainer From 2b258a6813fa2a472aa634683c082e2dfb4af97a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:36:57 -0500 Subject: [PATCH 100/155] finished callbacks --- docs/source/new-project.rst | 3 +++ pytorch_lightning/callbacks/pt_callbacks.py | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 5c03b9e5c841c..9521febc76388 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -68,4 +68,7 @@ Then you could do rapid research by switching between these two and using the sa 1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. 2. You get free GPU and 16-bit support without writing any of that code in your model. +<<<<<<< HEAD 3. You also get early stopping, multi-gpu training, 16-bit and MUCH more without coding anything! +3. You also get all of the capabilities below (without coding or testing yourself). + diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 1549f54d653e3..001981e431cbf 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -382,7 +382,6 @@ def __init__(self, scheduling: dict): if minimal_epoch < 1: msg = f"Epochs indexing from 1, epoch {minimal_epoch} cannot be interpreted correct" raise IndexError(msg) - elif minimal_epoch != 1: # if user didnt define first epoch accumulation factor scheduling.update({1: 1}) From 16956692f0585611fa37dee1ac963a3fde833f5e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:50:46 -0500 Subject: [PATCH 101/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 41c69251a209c..a4103d80902f5 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,5 +1,4 @@ """ - The trainer de-couples the engineering code (16-bit, early stopping, GPU distribution, etc...) from the science code (GAN, BERT, your project, etc...). It uses many assumptions which are best practices in AI research today. From dde99dfb7a63b344c923c9f54ee2b9fdcc48c711 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 20:57:04 -0500 Subject: [PATCH 102/155] finished callbacks --- pytorch_lightning/trainer/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index a4103d80902f5..41c69251a209c 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -1,4 +1,5 @@ """ + The trainer de-couples the engineering code (16-bit, early stopping, GPU distribution, etc...) from the science code (GAN, BERT, your project, etc...). It uses many assumptions which are best practices in AI research today. From c0148d73a4e5c05d00bb313ff73846bd261d3447 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:17:16 -0500 Subject: [PATCH 103/155] fixed left menu --- docs/source/new-project.rst | 2 -- pytorch_lightning/trainer/trainer.py | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/source/new-project.rst b/docs/source/new-project.rst index 9521febc76388..35834518355d8 100644 --- a/docs/source/new-project.rst +++ b/docs/source/new-project.rst @@ -68,7 +68,5 @@ Then you could do rapid research by switching between these two and using the sa 1. You're writing pure PyTorch... no unnecessary abstractions or new libraries to learn. 2. You get free GPU and 16-bit support without writing any of that code in your model. -<<<<<<< HEAD 3. You also get early stopping, multi-gpu training, 16-bit and MUCH more without coding anything! -3. You also get all of the capabilities below (without coding or testing yourself). diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 27fcc7f21ddf7..a9e0ffb448c6c 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,4 +1,3 @@ -import logging import os import sys import warnings @@ -47,6 +46,8 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): + r"""Abstract base class used to build new callbacks. + """ def __init__( self, From 45cbf1522357e12719261ff32b94f43fc86f9ce1 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:29:30 -0500 Subject: [PATCH 104/155] added callbacks to menu --- docs/source/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 686752891cad3..cdee132d38bf6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -54,7 +54,6 @@ PyTorch-Lightning Documentation BECOMING_A_CORE_CONTRIBUTOR.md governance.md - Indices and tables ------------------ From fd8bd4e5ebd3161dbcd90dc9368979a9f4a0dcee Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:36:43 -0500 Subject: [PATCH 105/155] added direct links to docs --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index cdee132d38bf6..755bbfe5f2025 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -49,6 +49,7 @@ PyTorch-Lightning Documentation :name: community :caption: Community + CODE_OF_CONDUCT.md CONTRIBUTING.md BECOMING_A_CORE_CONTRIBUTOR.md From 124aaa00e1712398ae11aba181f629f3fda9d331 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:43:04 -0500 Subject: [PATCH 106/155] added direct links to docs --- pytorch_lightning/core/lightning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 2e8b634d8ce14..dc4aabdc29f5e 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -5,6 +5,7 @@ from abc import ABC, abstractmethod from argparse import Namespace + import pandas as pd import torch import torch.distributed as dist From f8a84d275bad3b8b0c7a4aab9b32a1c34e617be0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 21:49:12 -0500 Subject: [PATCH 107/155] added direct links to docs --- pytorch_lightning/core/lightning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index dc4aabdc29f5e..4a67ba56532e5 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -19,6 +19,7 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks): + def __init__(self, *args, **kwargs): super(LightningModule, self).__init__(*args, **kwargs) From 792706a34587c899595a196dd9332648a905090f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Wed, 15 Jan 2020 22:01:11 -0500 Subject: [PATCH 108/155] added direct links to docs --- pytorch_lightning/callbacks/pt_callbacks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 001981e431cbf..70da98d26b84b 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -2,6 +2,7 @@ Callbacks ==================================== Callbacks supported by Lightning + """ import os From b3c51b89d52e65c18fc76b10177740df5453d28a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 06:04:17 -0500 Subject: [PATCH 109/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 2 +- pytorch_lightning/trainer/trainer.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 41c69251a209c..98c2b99b56357 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -27,4 +27,4 @@ from .trainer import Trainer -__all__ = ['Trainer'] \ No newline at end of file +__all__ = ['Trainer'] diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index a9e0ffb448c6c..315d9ca9eaf0d 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1,6 +1,7 @@ import os import sys import warnings +import logging import torch import torch.distributed as dist @@ -46,7 +47,9 @@ class Trainer(TrainerIOMixin, TrainerTrainLoopMixin, TrainerCallbackConfigMixin, ): - r"""Abstract base class used to build new callbacks. + r""" + Main trainer class + """ def __init__( @@ -417,6 +420,7 @@ def __init__( """ # # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: + # - `nb_sanity_val_steps` # Transfer params From 333cd2dd6714c05f789b52fb70a345effb20ae21 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 07:25:16 -0500 Subject: [PATCH 110/155] added direct links to docs --- pytorch_lightning/trainer/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index 98c2b99b56357..c18f22b821a3f 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -26,5 +26,4 @@ """ from .trainer import Trainer - __all__ = ['Trainer'] From acfffe1822d916158243733960de1430bca57d12 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 07:45:36 -0500 Subject: [PATCH 111/155] finished rebase --- pytorch_lightning/trainer/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index c18f22b821a3f..e250350d87e0e 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -11,6 +11,7 @@ - which optimizers to use - how to do the computations + The Trainer delegates those calls to your LightningModule which defines how to do those parts. This is the basic use of the trainer: From 4f7af2157bfff1469abbc276dc3bc7d5e4c2e50d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 08:06:12 -0500 Subject: [PATCH 112/155] making private members --- pytorch_lightning/trainer/trainer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 315d9ca9eaf0d..cd15011f2ef09 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -707,7 +707,7 @@ def fit(self, model): # CHOOSE OPTIMIZER # allow for lr schedulers as well - self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) + self.optimizers, self.lr_schedulers = self.__init_optimizers(model.configure_optimizers()) self.run_pretrain_routine(model) @@ -715,7 +715,7 @@ def fit(self, model): # used for testing or when we need to know that training succeeded return 1 - def init_optimizers(self, optimizers): + def __init_optimizers(self, optimizers): # single optimizer if isinstance(optimizers, Optimizer): @@ -724,14 +724,14 @@ def init_optimizers(self, optimizers): # two lists elif len(optimizers) == 2 and isinstance(optimizers[0], list): optimizers, lr_schedulers = optimizers - lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.configure_schedulers(lr_schedulers) + lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.__configure_schedulers(lr_schedulers) return optimizers, lr_schedulers # single list or tuple elif isinstance(optimizers, list) or isinstance(optimizers, tuple): return optimizers, [] - def configure_schedulers(self, schedulers): + def __configure_schedulers(self, schedulers): for i, scheduler in enumerate(schedulers): if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): reduce_lr_on_plateau_scheduler = schedulers.pop(i) From 56fb889d81c4676992c6a637d10afd7023d2d48b Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 08:11:04 -0500 Subject: [PATCH 113/155] making private members --- pytorch_lightning/trainer/trainer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index cd15011f2ef09..315d9ca9eaf0d 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -707,7 +707,7 @@ def fit(self, model): # CHOOSE OPTIMIZER # allow for lr schedulers as well - self.optimizers, self.lr_schedulers = self.__init_optimizers(model.configure_optimizers()) + self.optimizers, self.lr_schedulers = self.init_optimizers(model.configure_optimizers()) self.run_pretrain_routine(model) @@ -715,7 +715,7 @@ def fit(self, model): # used for testing or when we need to know that training succeeded return 1 - def __init_optimizers(self, optimizers): + def init_optimizers(self, optimizers): # single optimizer if isinstance(optimizers, Optimizer): @@ -724,14 +724,14 @@ def __init_optimizers(self, optimizers): # two lists elif len(optimizers) == 2 and isinstance(optimizers[0], list): optimizers, lr_schedulers = optimizers - lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.__configure_schedulers(lr_schedulers) + lr_schedulers, self.reduce_lr_on_plateau_scheduler = self.configure_schedulers(lr_schedulers) return optimizers, lr_schedulers # single list or tuple elif isinstance(optimizers, list) or isinstance(optimizers, tuple): return optimizers, [] - def __configure_schedulers(self, schedulers): + def configure_schedulers(self, schedulers): for i, scheduler in enumerate(schedulers): if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): reduce_lr_on_plateau_scheduler = schedulers.pop(i) From 3f7ed8aa160698e65d3786141dd997fdd70e0cb4 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 08:26:53 -0500 Subject: [PATCH 114/155] making private members --- pytorch_lightning/trainer/trainer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 315d9ca9eaf0d..5785512894d1b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -49,9 +49,7 @@ class Trainer(TrainerIOMixin, ): r""" Main trainer class - """ - def __init__( self, logger=True, @@ -420,7 +418,6 @@ def __init__( """ # # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: - # - `nb_sanity_val_steps` # Transfer params From 3fae45c8c42feb64f0bdac63b809ede90ac0ed8d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 09:26:35 -0500 Subject: [PATCH 115/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 5785512894d1b..758de820334d5 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -3,6 +3,7 @@ import warnings import logging + import torch import torch.distributed as dist import torch.multiprocessing as mp From 4c593df2d6ac7623e01961fb35369aa25ec7f01a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 09:50:13 -0500 Subject: [PATCH 116/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 758de820334d5..2f76432fcd6ca 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -418,6 +418,7 @@ def __init__( trainer = Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt') """ # + # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: # - `nb_sanity_val_steps` From fbd0f0bce150e96aa2e26bf1a25ff5656254803a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 09:55:55 -0500 Subject: [PATCH 117/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 2f76432fcd6ca..758de820334d5 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -418,7 +418,6 @@ def __init__( trainer = Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt') """ # - # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: # - `nb_sanity_val_steps` From 44f50683fde3b64cefc8aacb9294397e5d41fa82 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:13:48 -0500 Subject: [PATCH 118/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 758de820334d5..b2c7d0dadb2b7 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -313,7 +313,6 @@ def __init__( Example:: # default used by the Trainer trainer = Trainer(row_log_interval=10) - add_row_log_interval (int): .. deprecated:: 0.5.0 Use `row_log_interval` instead. Will remove 0.8.0. From 101099f1fdd8325cbcd1cffc6f6cb93250ab0c8e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:14:29 -0500 Subject: [PATCH 119/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b2c7d0dadb2b7..743c8fb151525 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -854,6 +854,7 @@ def test(self, model=None): trainer = Trainer() trainer.test(model) + """ self.testing = True if model is not None: From 3e603572a2d284ade4c6fcac9f32319d016ec45d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:16:17 -0500 Subject: [PATCH 120/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 743c8fb151525..b2c7d0dadb2b7 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -854,7 +854,6 @@ def test(self, model=None): trainer = Trainer() trainer.test(model) - """ self.testing = True if model is not None: From 9983a28b24f22f3a4d0cd44fa2ea219168efad19 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:25:51 -0500 Subject: [PATCH 121/155] set auto dp if no backend --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b2c7d0dadb2b7..743c8fb151525 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -854,6 +854,7 @@ def test(self, model=None): trainer = Trainer() trainer.test(model) + """ self.testing = True if model is not None: From 53f57e4caa04a94a70efdc84045699687daf30b2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:32:41 -0500 Subject: [PATCH 122/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 743c8fb151525..b2c7d0dadb2b7 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -854,7 +854,6 @@ def test(self, model=None): trainer = Trainer() trainer.test(model) - """ self.testing = True if model is not None: From aa2d5d0efabfb43198682a06061e84f83824c7d3 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:40:16 -0500 Subject: [PATCH 123/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b2c7d0dadb2b7..743c8fb151525 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -854,6 +854,7 @@ def test(self, model=None): trainer = Trainer() trainer.test(model) + """ self.testing = True if model is not None: From 309888e4035bae53fa91c04bef3bb89226845001 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:41:05 -0500 Subject: [PATCH 124/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 743c8fb151525..20db286be6f3b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -852,9 +852,7 @@ def test(self, model=None): # run test from a loaded model model = LightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') trainer = Trainer() - trainer.test(model) - """ self.testing = True if model is not None: From 4cdcdf7688b63f09860f49f22005d5a87a9fe91a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:47:12 -0500 Subject: [PATCH 125/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 20db286be6f3b..b2c7d0dadb2b7 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -852,6 +852,7 @@ def test(self, model=None): # run test from a loaded model model = LightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') trainer = Trainer() + trainer.test(model) """ self.testing = True From d102f735d3d0a91d76997f8bfdafebf652a5b44d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:47:39 -0500 Subject: [PATCH 126/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index b2c7d0dadb2b7..20db286be6f3b 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -852,7 +852,6 @@ def test(self, model=None): # run test from a loaded model model = LightningModule.load_from_checkpoint('path/to/checkpoint.ckpt') trainer = Trainer() - trainer.test(model) """ self.testing = True From 155fb07848c08b8adc120ad72c61de6cd64b3511 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 10:51:15 -0500 Subject: [PATCH 127/155] working on trainer docs --- pytorch_lightning/trainer/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 20db286be6f3b..ca380b122618a 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -395,6 +395,7 @@ def __init__( .. deprecated:: 0.5.0 Use `num_sanity_val_steps` instead. Will remove 0.8.0. + truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of a much longer sequence If this is enabled, your batches will automatically get truncated and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence From 8649ae65b82b2c324f98b57623295b3ab083da1a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 13:36:48 -0500 Subject: [PATCH 128/155] working on trainer docs --- pytorch_lightning/core/lightning.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 4a67ba56532e5..ad68416682d63 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -9,12 +9,13 @@ import pandas as pd import torch import torch.distributed as dist - +# from pytorch_lightning.core.decorators import data_loader from pytorch_lightning.core.grads import GradInformation from pytorch_lightning.core.hooks import ModelHooks -from pytorch_lightning.core.memory import ModelSummary from pytorch_lightning.core.saving import ModelIO +from pytorch_lightning.core.memory import ModelSummary +from pytorch_lightning.trainer.training_io import load_hparams_from_tags_csv from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel From b7e861d93d9cc78d1a7038ad299e1a103d6d74ee Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 14:51:54 -0500 Subject: [PATCH 129/155] fixed lightning import --- pytorch_lightning/core/lightning.py | 1 - pytorch_lightning/trainer/training_io.py | 1 - 2 files changed, 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index ad68416682d63..a9a70f776f6fc 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -15,7 +15,6 @@ from pytorch_lightning.core.hooks import ModelHooks from pytorch_lightning.core.saving import ModelIO from pytorch_lightning.core.memory import ModelSummary -from pytorch_lightning.trainer.training_io import load_hparams_from_tags_csv from pytorch_lightning.overrides.data_parallel import LightningDistributedDataParallel diff --git a/pytorch_lightning/trainer/training_io.py b/pytorch_lightning/trainer/training_io.py index 6ea819ba1691c..3c489132c739c 100644 --- a/pytorch_lightning/trainer/training_io.py +++ b/pytorch_lightning/trainer/training_io.py @@ -98,7 +98,6 @@ from subprocess import call from argparse import Namespace -import pandas as pd import torch import torch.distributed as dist From a01f685672f8495a3bbbb1bc193b370e57b1e5ac Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 15:11:17 -0500 Subject: [PATCH 130/155] cleared spaces --- pytorch_lightning/core/lightning.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index a9a70f776f6fc..841a8bb2b608b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -19,7 +19,6 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks): - def __init__(self, *args, **kwargs): super(LightningModule, self).__init__(*args, **kwargs) From ef3fe538014ef273e6d31c0c5709ae5833be6a5e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 15:36:40 -0500 Subject: [PATCH 131/155] cleared spaces --- pytorch_lightning/core/lightning.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 841a8bb2b608b..93689ac2a3a7a 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -680,7 +680,6 @@ def configure_apex(self, amp, model, optimizers, amp_level): @abstractmethod def configure_optimizers(self): r""" - This is where you choose what optimizers and learning-rate schedulers to use in your optimization. Normally you'd need one. But in the case of GANs or something more esoteric you might have multiple. From 22bd7dc9209a9eae269209684438b0e57c4719fa Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:08:03 -0500 Subject: [PATCH 132/155] cleared spaces --- pytorch_lightning/core/lightning.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 93689ac2a3a7a..20c62cf82ba02 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1015,7 +1015,6 @@ def load_from_metrics(cls, weights_path, tags_csv, map_location=None): map_location (dict): A dictionary mapping saved weight GPU devices to new GPU devices (example: {'cuda:1':'cuda:0'}) - Return: LightningModule with loaded weights @@ -1136,7 +1135,6 @@ def summarize(self, mode): def freeze(self): r""" - Freeze all params for inference Example From 95df5e73d940571a7f25330c3e00764617689835 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:20:03 -0500 Subject: [PATCH 133/155] cleared spaces --- pytorch_lightning/core/lightning.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 20c62cf82ba02..23757110c4aa8 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1166,7 +1166,6 @@ def unfreeze(self): def on_load_checkpoint(self, checkpoint): r""" - Called by lightning to restore your model. If you saved something with **on_save_checkpoint** this is your chance to restore this. From 5e38ab259e4cbd8ccbe7860db40f8c6c21e64483 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:23:08 -0500 Subject: [PATCH 134/155] cleared spaces --- pytorch_lightning/core/lightning.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 23757110c4aa8..2ae7fd32e3483 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1234,6 +1234,7 @@ def convert(val): return False for c in constructors: + try: return c(val) except ValueError: From 8258d9c1c2d5865697f763c2b2daa4c1d4389df9 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:43:44 -0500 Subject: [PATCH 135/155] finished lightning module --- pytorch_lightning/core/lightning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 2ae7fd32e3483..afc87021c511b 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1228,13 +1228,13 @@ def convert(val): constructors = [int, float, str] if type(val) is str: + if val.lower() == 'true': return True if val.lower() == 'false': return False for c in constructors: - try: return c(val) except ValueError: From 150a94543c2ac346bd220bfc2b5c2dcc8e5b110d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:50:05 -0500 Subject: [PATCH 136/155] finished lightning module --- pytorch_lightning/callbacks/pt_callbacks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 70da98d26b84b..71e61f9bfb2be 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -361,6 +361,7 @@ class GradientAccumulationScheduler(Callback): Args: scheduling (dict): scheduling in format {epoch: accumulation_factor} + Example:: from pytorch_lightning import Trainer From b3514960015162ae8d5c776d12fabe044a298661 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:50:54 -0500 Subject: [PATCH 137/155] finished lightning module --- pytorch_lightning/callbacks/pt_callbacks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 71e61f9bfb2be..065901c98c587 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -95,6 +95,7 @@ class EarlyStopping(Callback): early_stopping = EarlyStopping('val_loss') Trainer(early_stop_callback=early_stopping) + """ def __init__(self, monitor='val_loss', From d8e112d4e2818b57587062dded3dbd430a4841d0 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 16:51:23 -0500 Subject: [PATCH 138/155] finished lightning module --- pytorch_lightning/callbacks/pt_callbacks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 065901c98c587..57b34f0343688 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -362,7 +362,6 @@ class GradientAccumulationScheduler(Callback): Args: scheduling (dict): scheduling in format {epoch: accumulation_factor} - Example:: from pytorch_lightning import Trainer From db72ece7bbc89ed4b96d0930ce96ff47a4218222 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:02:25 -0500 Subject: [PATCH 139/155] added callbacks --- pytorch_lightning/logging/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 5fbb93cddc14d..2697e4e652aa8 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -71,7 +71,6 @@ def any_lightning_module_function_or_hook(...): Supported Loggers ----------------- """ - from os import environ from .base import LightningLoggerBase, rank_zero_only From cd1429b58779e88e1967d163c2b8d67609cc4e37 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:06:32 -0500 Subject: [PATCH 140/155] added loggers --- pytorch_lightning/logging/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 2697e4e652aa8..1ccabd20dbfd1 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -113,4 +113,5 @@ def any_lightning_module_function_or_hook(...): except ImportError: pass + __all__ = all From 588fd516a1aeea77c41d208a0d254a736ade80ff Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:19:07 -0500 Subject: [PATCH 141/155] added loggers --- pytorch_lightning/logging/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index 1ccabd20dbfd1..b112746e21b9b 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -76,7 +76,7 @@ def any_lightning_module_function_or_hook(...): from .base import LightningLoggerBase, rank_zero_only from .tensorboard import TensorBoardLogger -all = [] +all = ['TensorBoardLogger'] try: # needed to prevent ImportError and duplicated logs. @@ -113,5 +113,4 @@ def any_lightning_module_function_or_hook(...): except ImportError: pass - __all__ = all From 94943afcb87a0ebf3838f7f9cb4a09ab1a8a251a Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:19:40 -0500 Subject: [PATCH 142/155] added loggers --- pytorch_lightning/logging/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index b112746e21b9b..2697e4e652aa8 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -76,7 +76,7 @@ def any_lightning_module_function_or_hook(...): from .base import LightningLoggerBase, rank_zero_only from .tensorboard import TensorBoardLogger -all = ['TensorBoardLogger'] +all = [] try: # needed to prevent ImportError and duplicated logs. From aae963a186853f8e6f3cb312016c5c301cfb7222 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:24:03 -0500 Subject: [PATCH 143/155] added loggers --- pytorch_lightning/logging/mlflow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/logging/mlflow.py b/pytorch_lightning/logging/mlflow.py index 50f4843e0f6c9..bf04917c8391e 100644 --- a/pytorch_lightning/logging/mlflow.py +++ b/pytorch_lightning/logging/mlflow.py @@ -47,6 +47,7 @@ def __init__(self, experiment_name, tracking_uri=None, tags=None): experiment_name (str): The name of the experiment tracking_uri (str): where this should track tags (dict): todo this param + """ super().__init__() self._mlflow_client = mlflow.tracking.MlflowClient(tracking_uri) From 37b0cd5bd22dae4300d3f4de9210cd10a1eb9d25 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:24:41 -0500 Subject: [PATCH 144/155] added loggers --- pytorch_lightning/logging/mlflow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/logging/mlflow.py b/pytorch_lightning/logging/mlflow.py index bf04917c8391e..50f4843e0f6c9 100644 --- a/pytorch_lightning/logging/mlflow.py +++ b/pytorch_lightning/logging/mlflow.py @@ -47,7 +47,6 @@ def __init__(self, experiment_name, tracking_uri=None, tags=None): experiment_name (str): The name of the experiment tracking_uri (str): where this should track tags (dict): todo this param - """ super().__init__() self._mlflow_client = mlflow.tracking.MlflowClient(tracking_uri) From 44d844022fa8db068985744e4266946216d4dce5 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:29:13 -0500 Subject: [PATCH 145/155] added loggers --- pytorch_lightning/logging/neptune.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py index 7c677962df70e..4b179878bcab2 100644 --- a/pytorch_lightning/logging/neptune.py +++ b/pytorch_lightning/logging/neptune.py @@ -92,6 +92,7 @@ def __init__(self, api_key=None, project_name=None, offline_mode=False, ) trainer = Trainer(max_epochs=10, logger=neptune_logger) + Args: api_key (str | None): Required in online mode. Neputne API token, found on https://neptune.ml. Read how to get your API key From 20efe6cf3a90f902a262215137b57104927bdb03 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:35:02 -0500 Subject: [PATCH 146/155] added loggers --- pytorch_lightning/logging/neptune.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py index 4b179878bcab2..7c677962df70e 100644 --- a/pytorch_lightning/logging/neptune.py +++ b/pytorch_lightning/logging/neptune.py @@ -92,7 +92,6 @@ def __init__(self, api_key=None, project_name=None, offline_mode=False, ) trainer = Trainer(max_epochs=10, logger=neptune_logger) - Args: api_key (str | None): Required in online mode. Neputne API token, found on https://neptune.ml. Read how to get your API key From e9498b22416582b535bf770bcb937d2a1c6d2c80 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:39:09 -0500 Subject: [PATCH 147/155] added loggers --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index 755bbfe5f2025..b4cf5e6d26419 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -55,6 +55,7 @@ PyTorch-Lightning Documentation BECOMING_A_CORE_CONTRIBUTOR.md governance.md + Indices and tables ------------------ From 7c048d3cf5b0154e98062baea8aaab665e4e9cd7 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:41:11 -0500 Subject: [PATCH 148/155] added loggers --- docs/source/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index b4cf5e6d26419..755bbfe5f2025 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -55,7 +55,6 @@ PyTorch-Lightning Documentation BECOMING_A_CORE_CONTRIBUTOR.md governance.md - Indices and tables ------------------ From 23b0a746b34a6793ce5e9463e11257c3f2a5aa5d Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:42:12 -0500 Subject: [PATCH 149/155] added loggers --- docs/source/examples.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/examples.rst b/docs/source/examples.rst index a734243d5d41b..93d69c81c9bf3 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -32,3 +32,4 @@ Imagenet :maxdepth: 3 pl_examples.full_examples.imagenet.imagenet_example + From a962c03f319f19dcd75741deb2b2a91f4526a0e7 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 16 Jan 2020 17:42:24 -0500 Subject: [PATCH 150/155] added loggers --- docs/source/examples.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 93d69c81c9bf3..a734243d5d41b 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -32,4 +32,3 @@ Imagenet :maxdepth: 3 pl_examples.full_examples.imagenet.imagenet_example - From 8ad6623ab617b303b34d5aecf0ca28f7a089282f Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 17 Jan 2020 05:03:31 -0500 Subject: [PATCH 151/155] flake 8 --- pytorch_lightning/trainer/__init__.py | 1 - pytorch_lightning/trainer/trainer.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch_lightning/trainer/__init__.py b/pytorch_lightning/trainer/__init__.py index e250350d87e0e..c18f22b821a3f 100644 --- a/pytorch_lightning/trainer/__init__.py +++ b/pytorch_lightning/trainer/__init__.py @@ -11,7 +11,6 @@ - which optimizers to use - how to do the computations - The Trainer delegates those calls to your LightningModule which defines how to do those parts. This is the basic use of the trainer: diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index ca380b122618a..68451f0022a9f 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -313,6 +313,7 @@ def __init__( Example:: # default used by the Trainer trainer = Trainer(row_log_interval=10) + add_row_log_interval (int): .. deprecated:: 0.5.0 Use `row_log_interval` instead. Will remove 0.8.0. @@ -395,7 +396,6 @@ def __init__( .. deprecated:: 0.5.0 Use `num_sanity_val_steps` instead. Will remove 0.8.0. - truncated_bptt_steps (int): Truncated back prop breaks performs backprop every k steps of a much longer sequence If this is enabled, your batches will automatically get truncated and the trainer will apply Truncated Backprop to it. Make sure your batches have a sequence From 3bb91bad9992350b3ed9b1f1ca8c1fe5d84d5be6 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Fri, 17 Jan 2020 05:31:16 -0500 Subject: [PATCH 152/155] flake 8 --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index 755bbfe5f2025..f22d45fe6ccff 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -3,6 +3,7 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. + PyTorch-Lightning Documentation ============================= From a53e6aa67bc204ef42af335c6fc57313531882f2 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 21 Jan 2020 13:20:55 -0500 Subject: [PATCH 153/155] fix docs path --- docs/source/callbacks.rst | 14 ++++++++++++++ docs/source/common-cases.rst | 21 +++++++++++++++++++++ docs/source/lightning-module.rst | 10 ++++++++++ docs/source/logging.rst | 12 ++++++++++++ docs/source/modules.rst | 7 +++++++ docs/source/trainer.rst | 21 +++++++++++++++++++++ docs/source/tutorials.rst | 20 ++++++++++++++++++++ 7 files changed, 105 insertions(+) create mode 100644 docs/source/callbacks.rst create mode 100644 docs/source/common-cases.rst create mode 100644 docs/source/lightning-module.rst create mode 100644 docs/source/logging.rst create mode 100644 docs/source/modules.rst create mode 100644 docs/source/trainer.rst create mode 100644 docs/source/tutorials.rst diff --git a/docs/source/callbacks.rst b/docs/source/callbacks.rst new file mode 100644 index 0000000000000..ae8dd25f1f93b --- /dev/null +++ b/docs/source/callbacks.rst @@ -0,0 +1,14 @@ +.. role:: hidden + :class: hidden-section + +Callbacks +=========== +.. automodule:: pytorch_lightning.callbacks + :exclude-members: + _del_model, + _save_model, + on_epoch_end, + on_train_end, + on_epoch_begin, + check_monitor_top_k, + on_train_begin, \ No newline at end of file diff --git a/docs/source/common-cases.rst b/docs/source/common-cases.rst new file mode 100644 index 0000000000000..7b96a93d84660 --- /dev/null +++ b/docs/source/common-cases.rst @@ -0,0 +1,21 @@ +Multi-gpu (same node) training +============================== + +Multi-node training +==================== + +16-bit precision +================= + +gradient clipping +================= + +modifying training via hooks +============================= + + + +.. toctree:: + :maxdepth: 3 + + pl_examples \ No newline at end of file diff --git a/docs/source/lightning-module.rst b/docs/source/lightning-module.rst new file mode 100644 index 0000000000000..93c81537f65ca --- /dev/null +++ b/docs/source/lightning-module.rst @@ -0,0 +1,10 @@ +.. role:: hidden + :class: hidden-section + +LightningModule +=========== +.. automodule:: pytorch_lightning.core + :exclude-members: + _abc_impl, + summarize, + diff --git a/docs/source/logging.rst b/docs/source/logging.rst new file mode 100644 index 0000000000000..24f49f0ab15c0 --- /dev/null +++ b/docs/source/logging.rst @@ -0,0 +1,12 @@ +.. role:: hidden + :class: hidden-section + +Logging +=========== +.. automodule:: pytorch_lightning.logging + :exclude-members: + _abc_impl, + _save_model, + on_epoch_end, + on_train_end, + on_epoch_begin, diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000000000..e4c5121858c28 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +pl_examples +=========== + +.. toctree:: + :maxdepth: 4 + + pl_examples diff --git a/docs/source/trainer.rst b/docs/source/trainer.rst new file mode 100644 index 0000000000000..db2657dc2c429 --- /dev/null +++ b/docs/source/trainer.rst @@ -0,0 +1,21 @@ +.. role:: hidden + :class: hidden-section + +Trainer +=========== +.. automodule:: pytorch_lightning.trainer + :members: fit, test + :exclude-members: + run_pretrain_routine, + _abc_impl, + _Trainer__set_root_gpu, + _Trainer__init_optimizers, + _Trainer__parse_gpu_ids, + _Trainer__configure_schedulers, + data_parallel, + num_gpus, + slurm_job_id, + tng_tqdm_dic, + training_tqdm_dict, + init_optimizers, + configure_schedulers diff --git a/docs/source/tutorials.rst b/docs/source/tutorials.rst new file mode 100644 index 0000000000000..7b66d141ff2f9 --- /dev/null +++ b/docs/source/tutorials.rst @@ -0,0 +1,20 @@ +Refactoring PyTorch into Lightning +================================== +`Tutorial `_ + +Start a research project +========================= +`Research seed `_ + +Basic Lightning use +==================== +`Tutorial `_ + +9 key Lightning tricks +======================== +`Tutorial <9 key speed features in Pytorch-Lightning>`_ + +Multi-node training on SLURM +============================= +`Tutorial `_ + From 8e5e227152d0360df5e1807f709d5266f81565fb Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 21 Jan 2020 14:16:55 -0500 Subject: [PATCH 154/155] flake 8 --- pytorch_lightning/callbacks/pt_callbacks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 001981e431cbf..4c7d877a85bd6 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -403,4 +403,4 @@ def on_epoch_begin(self, epoch, trainer): # should_stop = c.on_epoch_end(i, logs={'val_loss': loss}) # logging.info(loss) # if should_stop: -# break \ No newline at end of file +# break From bc4cd3d69adfd56573a877897e0133f67f013d9e Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 21 Jan 2020 14:18:43 -0500 Subject: [PATCH 155/155] Update theme_variables.jinja --- docs/source/_templates/theme_variables.jinja | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/_templates/theme_variables.jinja b/docs/source/_templates/theme_variables.jinja index 3a67ad64d384d..b61b79d698bf2 100644 --- a/docs/source/_templates/theme_variables.jinja +++ b/docs/source/_templates/theme_variables.jinja @@ -2,16 +2,16 @@ 'github': 'https://github.com/PytorchLightning/pytorch-lightning', 'github_issues': 'https://github.com/PytorchLightning/pytorch-lightning/issues', 'contributing': 'https://github.com/PytorchLightning/pytorch-lightning/blob/master/CONTRIBUTING.md', - 'docs': 'https://pytorchlightning.github.io/pytorch-lightning', + 'docs': 'https://pytorch-lightning.rtfd.io/en/latest', 'twitter': 'https://twitter.com/PyTorchLightnin', 'discuss': 'https://discuss.pytorch.org', - 'tutorials': 'https://pytorchlightning.github.io/pytorch-lightning/', - 'previous_pytorch_versions': 'https://pytorchlightning.github.io/pytorch-lightning/', - 'home': 'https://pytorchlightning.github.io/pytorch-lightning/', - 'get_started': 'https://pytorchlightning.github.io/pytorch-lightning/', - 'features': 'https://pytorchlightning.github.io/pytorch-lightning/', - 'blog': 'https://pytorchlightning.github.io/pytorch-lightning/', - 'resources': 'https://pytorchlightning.github.io/pytorch-lightning/', - 'support': 'https://pytorchlightning.github.io/pytorch-lightning/', + 'tutorials': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'previous_pytorch_versions': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'home': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'get_started': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'features': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'blog': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'resources': 'https://pytorch-lightning.rtfd.io/en/latest/', + 'support': 'https://pytorch-lightning.rtfd.io/en/latest/', } -%}