diff --git a/README.md b/README.md index dba983b2f9b14..f985990eaa3b8 100644 --- a/README.md +++ b/README.md @@ -84,12 +84,12 @@ Lightning sets up all the boilerplate state-of-the-art training for you so you c --- ## How do I do use it? -Think about Lightning as refactoring your research code instead of using a new framework. The research code goes into a [LightningModule](https://pytorch-lightning.rtfd.io/en/latest/LightningModule/RequiredTrainerInterface/) which you fit using a Trainer. +Think about Lightning as refactoring your research code instead of using a new framework. The research code goes into a [LightningModule](https://pytorch-lightning.rtfd.io/en/latest/lightning-module.html) which you fit using a Trainer. The LightningModule defines a *system* such as seq-2-seq, GAN, etc... It can ALSO define a simple classifier such as the example below. To use lightning do 2 things: -1. [Define a LightningModule](https://pytorch-lightning.rtfd.io/en/latest/LightningModule/RequiredTrainerInterface/) +1. [Define a LightningModule](https://pytorch-lightning.rtfd.io/en/latest/lightning-module.html) **WARNING:** This syntax is for version 0.5.0+ where abbreviations were removed. ```python import os @@ -165,7 +165,7 @@ To use lightning do 2 things: # OPTIONAL return DataLoader(MNIST(os.getcwd(), train=False, download=True, transform=transforms.ToTensor()), batch_size=32) ``` -2. Fit with a [trainer](https://pytorch-lightning.rtfd.io/en/latest/Trainer/) +2. Fit with a [trainer](https://pytorch-lightning.rtfd.io/en/latest/pytorch_lightning.trainer.html) ```python from pytorch_lightning import Trainer diff --git a/docs/source/_static/images/lightning_logo-name.svg b/docs/source/_static/images/lightning_logo-name.svg index 37cdba22a3fce..d684eb0fdcaad 100755 --- a/docs/source/_static/images/lightning_logo-name.svg +++ b/docs/source/_static/images/lightning_logo-name.svg @@ -1,12 +1,80 @@ - - - - long - Created with Sketch. - - - - PyTorch Lightning - - - \ No newline at end of file + + + + + + image/svg+xml + + + + + + + + long + Created with Sketch. + + + + PyTorch Lightning + + + diff --git a/docs/source/conf.py b/docs/source/conf.py index 5183ef6b933ed..a5d7841aa649c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -318,7 +318,7 @@ def find_source(): obj = getattr(obj, part) fname = inspect.getsourcefile(obj) # https://github.com/rtfd/readthedocs.org/issues/5735 - if any([s in fname for s in ('readthedocs', 'checkouts')]): + if any([s in fname for s in ('readthedocs', 'rtfd', 'checkouts')]): # /home/docs/checkouts/readthedocs.org/user_builds/pytorch_lightning/checkouts/ # devel/pytorch_lightning/utilities/cls_experiment.py#L26-L176 path_top = os.path.abspath(os.path.join('..', '..', '..')) @@ -338,6 +338,10 @@ def find_source(): # import subprocess # tag = subprocess.Popen(['git', 'rev-parse', 'HEAD'], stdout=subprocess.PIPE, # universal_newlines=True).communicate()[0][:-1] + branch = filename.split('/')[0] + # do mapping from latest tags to master + branch = {'latest': 'master', 'stable': 'master'}.get(branch, branch) + filename = '/'.join([branch] + filename.split('/')[1:]) return "https://github.com/%s/%s/blob/%s" \ % (github_user, github_repo, filename) diff --git a/docs/source/examples.rst b/docs/source/examples.rst index a734243d5d41b..c83c3fff93e14 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -1,33 +1,33 @@ GAN -==== +=== .. toctree:: :maxdepth: 3 pl_examples.domain_templates.gan MNIST -==== +===== .. toctree:: :maxdepth: 3 pl_examples.basic_examples.lightning_module_template Multi-node (ddp) MNIST -==== +====================== .. toctree:: :maxdepth: 3 pl_examples.multi_node_examples.multi_node_ddp_demo Multi-node (ddp2) MNIST -==== +======================= .. toctree:: :maxdepth: 3 pl_examples.multi_node_examples.multi_node_ddp2_demo Imagenet -==== +======== .. toctree:: :maxdepth: 3 diff --git a/pytorch_lightning/callbacks/pt_callbacks.py b/pytorch_lightning/callbacks/pt_callbacks.py index 20d035679fa7e..125e15e1e42c8 100644 --- a/pytorch_lightning/callbacks/pt_callbacks.py +++ b/pytorch_lightning/callbacks/pt_callbacks.py @@ -1,6 +1,7 @@ """ Callbacks -==================================== +========= + Callbacks supported by Lightning """ diff --git a/pytorch_lightning/core/__init__.py b/pytorch_lightning/core/__init__.py index 17d7619de3663..b338a8b4f7d9f 100644 --- a/pytorch_lightning/core/__init__.py +++ b/pytorch_lightning/core/__init__.py @@ -85,6 +85,7 @@ def test_dataloader(self): Once you've defined the LightningModule, fit it using a trainer. .. code-block:: python + trainer = pl.Trainer() model = CoolModel() diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index e9fe3eefb6bea..4850494f338bc 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -3,6 +3,7 @@ To use a logger, simply pass it into the trainer. .. code-block:: python + from pytorch_lightning import logging # lightning uses tensorboard by default diff --git a/pytorch_lightning/logging/test_tube.py b/pytorch_lightning/logging/test_tube.py index c1ebc8cad8ff4..10e14a502a37b 100644 --- a/pytorch_lightning/logging/test_tube.py +++ b/pytorch_lightning/logging/test_tube.py @@ -46,7 +46,7 @@ class TestTubeLogger(LightningLoggerBase): `os.path.join(save_dir, name, version)` Example - -------- + ------- .. code-block:: python diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index c2add79416021..0e31d5b2ff3fc 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -96,6 +96,7 @@ def __init__( Args: logger (:class:`.Logger`): Logger for experiment tracking. Example:: + from pytorch_lightning.logging import TensorBoardLogger # default logger used by trainer @@ -106,8 +107,10 @@ def __init__( ) Trainer(logger=logger) + checkpoint_callback (:class:`CheckpointCallback`): Callback for checkpointing. Example:: + from pytorch_lightning.callbacks import ModelCheckpoint # default used by the Trainer @@ -121,6 +124,7 @@ def __init__( ) trainer = Trainer(checkpoint_callback=checkpoint_callback) + early_stop_callback (:class:`.EarlyStopping`): Callback for early stopping. If set to ``True``, then the default callback monitoring ``'val_loss'`` is created. Will raise an error if ``'val_loss'`` is not found. @@ -129,6 +133,7 @@ def __init__( If ``'val_loss'`` is not found will work as if early stopping is disabled. Default: ``None``. Example:: + from pytorch_lightning.callbacks import EarlyStopping # default used by the Trainer @@ -141,25 +146,32 @@ def __init__( ) trainer = Trainer(early_stop_callback=early_stop_callback) + default_save_path (str): Default path for logs and weights when no logger/ckpt_callback passed Example:: + # default used by the Trainer trainer = Trainer(default_save_path=os.getcwd()) + gradient_clip_val (float): 0 means don't clip. Example:: + # default used by the Trainer trainer = Trainer(gradient_clip_val=0.0) + gradient_clip (int): .. deprecated:: 0.5.0 Use `gradient_clip_val` instead. Will remove 0.8.0. process_position (int): orders the tqdm bar when running multiple models on same machine. Example:: + # default used by the Trainer trainer = Trainer(process_position=0) num_nodes (int): number of GPU nodes for distributed training. Example:: + # default used by the Trainer trainer = Trainer(num_nodes=1) @@ -172,6 +184,7 @@ def __init__( gpus (list|str|int): Which GPUs to train on. Example:: + # default used by the Trainer (ie: train on CPU) trainer = Trainer(gpus=None) @@ -192,6 +205,7 @@ def __init__( log_gpu_memory (str): None, 'min_max', 'all'. Might slow performance because it uses the output of nvidia-smi. Example:: + # default used by the Trainer trainer = Trainer(log_gpu_memory=None) @@ -203,11 +217,13 @@ def __init__( show_progress_bar (bool): If true shows tqdm progress bar Example:: + # default used by the Trainer trainer = Trainer(show_progress_bar=True) overfit_pct (float): uses this much data of all datasets. Example:: + # default used by the Trainer trainer = Trainer(overfit_pct=0.0) @@ -216,14 +232,16 @@ def __init__( track_grad_norm (int): -1 no tracking. Otherwise tracks that norm Example:: + # default used by the Trainer trainer = Trainer(track_grad_norm=-1) # track the 2-norm trainer = Trainer(track_grad_norm=2) - check_val_every_n_epoch (int): check val every n train epochs + check_val_every_n_epoch (int): Check val every n train epochs. Example:: + # default used by the Trainer trainer = Trainer(check_val_every_n_epoch=1) @@ -232,6 +250,7 @@ def __init__( fast_dev_run (bool): runs 1 batch of train, test and val to find any bugs (ie: a sort of unit test). Example:: + # default used by the Trainer trainer = Trainer(fast_dev_run=False) @@ -240,6 +259,7 @@ def __init__( accumulate_grad_batches (int|dict): Accumulates grads every k batches or as set up in the dict. Example:: + # default used by the Trainer (no accumulation) trainer = Trainer(accumulate_grad_batches=1) @@ -249,8 +269,9 @@ def __init__( # no accumulation for epochs 1-4. accumulate 3 for epochs 5-10. accumulate 20 after that trainer = Trainer(accumulate_grad_batches={5: 3, 10: 20}) - max_epochs (int): Stop training once this number of epochs is reached + max_epochs (int): Stop training once this number of epochs is reached. Example:: + # default used by the Trainer trainer = Trainer(max_epochs=1000) @@ -260,6 +281,7 @@ def __init__( min_epochs (int): Force training for at least these many epochs Example:: + # default used by the Trainer trainer = Trainer(min_epochs=1) @@ -270,6 +292,7 @@ def __init__( train_percent_check (int): How much of training dataset to check. Useful when debugging or testing something that happens at the end of an epoch. Example:: + # default used by the Trainer trainer = Trainer(train_percent_check=1.0) @@ -279,6 +302,7 @@ def __init__( val_percent_check (int): How much of validation dataset to check. Useful when debugging or testing something that happens at the end of an epoch. Example:: + # default used by the Trainer trainer = Trainer(val_percent_check=1.0) @@ -288,6 +312,7 @@ def __init__( test_percent_check (int): How much of test dataset to check. Useful when debugging or testing something that happens at the end of an epoch. Example:: + # default used by the Trainer trainer = Trainer(test_percent_check=1.0) @@ -297,6 +322,7 @@ def __init__( val_check_interval (float|int): How often within one training epoch to check the validation set If float, % of tng epoch. If int, check every n batch Example:: + # default used by the Trainer trainer = Trainer(val_check_interval=1.0) @@ -310,11 +336,13 @@ def __init__( log_save_interval (int): Writes logs to disk this often Example:: + # default used by the Trainer trainer = Trainer(log_save_interval=100) row_log_interval (int): How often to add logging rows (does not write to disk) Example:: + # default used by the Trainer trainer = Trainer(row_log_interval=10) @@ -325,6 +353,7 @@ def __init__( distributed_backend (str): The distributed backend to use. Options: 'dp', 'ddp', 'ddp2'. Example:: + # default used by the Trainer trainer = Trainer(distributed_backend=None) @@ -344,17 +373,20 @@ def __init__( use_amp (bool): If true uses apex for 16bit precision Example:: + # default used by the Trainer trainer = Trainer(use_amp=False) print_nan_grads (bool): Prints gradients with nan values Example:: + # default used by the Trainer trainer = Trainer(print_nan_grads=False) weights_summary (str): Prints a summary of the weights when training begins. Options: 'full', 'top', None. Example:: + # default used by the Trainer (ie: print all weights) trainer = Trainer(weights_summary='full') @@ -366,6 +398,7 @@ def __init__( weights_save_path (str): Where to save weights if specified. Example:: + # default used by the Trainer trainer = Trainer(weights_save_path=os.getcwd()) @@ -383,6 +416,7 @@ def __init__( amp_level (str): The optimization level to use (O1, O2, etc...). Check nvidia docs for level (https://nvidia.github.io/apex/amp.html#opt-levels) Example:: + # default used by the Trainer trainer = Trainer(amp_level='O1') @@ -390,6 +424,7 @@ def __init__( This catches any bugs in your validation without having to wait for the first validation check. The Trainer uses 5 steps by default. Turn it off or modify it here. Example:: + # default used by the Trainer trainer = Trainer(num_sanity_val_steps=5) @@ -407,6 +442,7 @@ def __init__( recurrent network trajectories." `_) Example:: + # default used by the Trainer (ie: disabled) trainer = Trainer(truncated_bptt_steps=None) @@ -415,15 +451,18 @@ def __init__( resume_from_checkpoint (str): To resume training from a specific checkpoint pass in the path here.k Example:: + # default used by the Trainer trainer = Trainer(resume_from_checkpoint=None) # resume from a specific checkpoint trainer = Trainer(resume_from_checkpoint='some/path/to/my_checkpoint.ckpt') + + .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: + + - `nb_sanity_val_steps` + """ - # - # .. warning:: Following arguments become deprecated and they will be removed in v0.8.0: - # - `nb_sanity_val_steps` # Transfer params # Backward compatibility @@ -660,8 +699,10 @@ def training_tqdm_dict(self): def tng_tqdm_dic(self): """Read-only for tqdm metrics. - .. warning:: Deprecated in v0.5.0. use training_tqdm_dict instead. - :return: + :return: dictionary + + .. deprecated:: 0.5.0 + Use `training_tqdm_dict` instead. Will remove 0.8.0. """ warnings.warn("`tng_tqdm_dic` has renamed to `training_tqdm_dict` since v0.5.0" " and will be removed in v0.8.0", DeprecationWarning) @@ -718,7 +759,6 @@ def fit(self, model): return 1 def init_optimizers(self, optimizers): - # single optimizer if isinstance(optimizers, Optimizer): return [optimizers], []