Skip to content

Commit

Permalink
support more lr scheduler and add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
senwu committed Dec 3, 2019
1 parent 3039e60 commit 56b48fe
Show file tree
Hide file tree
Showing 16 changed files with 686 additions and 109 deletions.
49 changes: 39 additions & 10 deletions src/emmental/emmental-default-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Meta configuration
meta_config:
seed: 0 # random seed for all numpy/torch/cuda operations in model and learning
seed: # random seed for all numpy/torch/cuda operations in model and learning
verbose: True # whether to print the log information
log_path: # log directory
log_path: logs # log directory

# Data configuration
data_config:
Expand All @@ -19,9 +19,12 @@ model_config:
learner_config:
fp16: False # whether to use half precision
n_epochs: 1 # total number of learning epochs
train_split: train # the split for training, accepts str or list of strs
valid_split: valid # the split for validation, accepts str or list of strs
test_split: test # the split for testing, accepts str or list of strs
train_split: # the split for training, accepts str or list of strs
- train
valid_split: # the split for validation, accepts str or list of strs
- valid
test_split: # the split for testing, accepts str or list of strs
- test
ignore_index: # the ignore index, uses for masking samples
optimizer_config:
optimizer: adam # [sgd, adam, adamax, bert_adam]
Expand Down Expand Up @@ -83,14 +86,17 @@ learner_config:
warmup_unit: batch # [epoch, batch]
warmup_percentage: # warm up percentage
min_lr: 0.0 # minimum learning rate
linear_config:
min_lr: 0.0
exponential_config:
gamma: 0.9
plateau_config:
factor: 0.5
metric: model/train/all/loss
mode: min
factor: 0.1
patience: 10
threshold: 0.0001
threshold_mode: rel
cooldown: 0
eps: 0.00000001
step_config:
step_size: 1
gamma: 0.1
Expand All @@ -100,6 +106,29 @@ learner_config:
- 1000
gamma: 0.1
last_epoch: -1
cyclic_config:
base_lr: 0.001
max_lr: 0.1
step_size_up: 2000
step_size_down:
mode: triangular
gamma: 1.0
scale_fn:
scale_mode: cycle
cycle_momentum: True
base_momentum: 0.8
max_momentum: 0.9
last_epoch: -1
one_cycle_config:
max_lr: 0.1
pct_start: 0.3
anneal_strategy: cos
cycle_momentum: True
base_momentum: 0.85
max_momentum: 0.95
div_factor: 25.0
final_div_factor: 10000.0
last_epoch: -1
cosine_annealing_config:
last_epoch: -1
task_scheduler_config:
Expand All @@ -123,8 +152,8 @@ logging_config:
checkpointer_config:
checkpoint_path:
checkpoint_freq: 1
checkpoint_metric: # metric_name: mode, where mode in [min, max]
# model/train/all/loss: min
checkpoint_metric:
model/train/all/loss: min # metric_name: mode, where mode in [min, max]
checkpoint_task_metrics: # task_metric_name: mode
checkpoint_runway: 0 # checkpointing runway (no checkpointing before k unit)
clear_intermediate_checkpoints: True # whether to clear intermediate checkpoints
Expand Down
81 changes: 60 additions & 21 deletions src/emmental/learner.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import logging
from collections import defaultdict
from typing import Dict, List, Optional, Union
Expand All @@ -6,6 +7,7 @@
import torch
import torch.optim as optim
from numpy import ndarray
from torch.optim.lr_scheduler import _LRScheduler

from emmental import Meta
from emmental.data import EmmentalDataLoader
Expand Down Expand Up @@ -108,7 +110,17 @@ def _set_lr_scheduler(self, model: EmmentalModel) -> None:
self._set_warmup_scheduler(model)

# Set lr scheduler
# TODO: add more lr scheduler support

lr_scheduler_dict = {
"exponential": optim.lr_scheduler.ExponentialLR,
"plateau": optim.lr_scheduler.ReduceLROnPlateau,
"step": optim.lr_scheduler.StepLR,
"multi_step": optim.lr_scheduler.MultiStepLR,
"cyclic": optim.lr_scheduler.CyclicLR,
"one_cycle": optim.lr_scheduler.OneCycleLR, # type: ignore
"cosine_annealing": optim.lr_scheduler.CosineAnnealingLR,
}

opt = Meta.config["learner_config"]["lr_scheduler_config"]["lr_scheduler"]
lr_scheduler_config = Meta.config["learner_config"]["lr_scheduler_config"]

Expand All @@ -124,34 +136,42 @@ def _set_lr_scheduler(self, model: EmmentalModel) -> None:
lr_scheduler = optim.lr_scheduler.LambdaLR(
self.optimizer, linear_decay_func # type: ignore
)
elif opt == "exponential":
lr_scheduler = optim.lr_scheduler.ExponentialLR( # type: ignore
self.optimizer, **lr_scheduler_config["exponential_config"]
elif opt in ["exponential", "step", "multi_step", "cyclic"]:
lr_scheduler = lr_scheduler_dict[opt]( # type: ignore
self.optimizer, **lr_scheduler_config[f"{opt}_config"]
)
elif opt == "step":
lr_scheduler = optim.lr_scheduler.StepLR( # type: ignore
self.optimizer, **lr_scheduler_config["step_config"]
elif opt == "one_cycle":
total_steps = (
self.n_batches_per_epoch * Meta.config["learner_config"]["n_epochs"]
)
elif opt == "multi_step":
lr_scheduler = optim.lr_scheduler.MultiStepLR( # type: ignore
self.optimizer, **lr_scheduler_config["multi_step_config"]
lr_scheduler = lr_scheduler_dict[opt]( # type: ignore
self.optimizer,
total_steps=total_steps,
epochs=Meta.config["learner_config"]["n_epochs"],
steps_per_epoch=self.n_batches_per_epoch,
**lr_scheduler_config[f"{opt}_config"],
)
elif opt == "cosine_annealing":
total_steps = (
self.n_batches_per_epoch * Meta.config["learner_config"]["n_epochs"]
)
lr_scheduler = optim.lr_scheduler.CosineAnnealingLR( # type: ignore
lr_scheduler = lr_scheduler_dict[opt]( # type: ignore
self.optimizer,
total_steps,
eta_min=lr_scheduler_config["min_lr"],
**lr_scheduler_config["cosine_annealing_config"],
**lr_scheduler_config[f"{opt}_config"],
)
# elif opt == "reduce_on_plateau":
# lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
# self.optimizer,
# min_lr=lr_scheduler_config["min_lr"],
# **lr_scheduler_config["plateau_config"],
# )
elif opt == "plateau":
plateau_config = copy.deepcopy(lr_scheduler_config["plateau_config"])
del plateau_config["metric"]
lr_scheduler = lr_scheduler_dict[opt](
self.optimizer,
verbose=Meta.config["meta_config"]["verbose"],
min_lr=lr_scheduler_config["min_lr"],
**plateau_config,
)
elif isinstance(opt, _LRScheduler):
lr_scheduler = opt(self.optimizer) # type: ignore
else:
raise ValueError(f"Unrecognized lr scheduler option '{opt}'")

Expand Down Expand Up @@ -218,7 +238,9 @@ def _set_warmup_scheduler(self, model: EmmentalModel) -> None:

self.warmup_scheduler = warmup_scheduler

def _update_lr_scheduler(self, model: EmmentalModel, step: int) -> None:
def _update_lr_scheduler(
self, model: EmmentalModel, step: int, metric_dict: Dict[str, float]
) -> None:
r"""Update the lr using lr_scheduler with each batch.
Args:
Expand All @@ -237,7 +259,24 @@ def _update_lr_scheduler(self, model: EmmentalModel, step: int) -> None:
)

if (step + 1) % lr_step_cnt == 0:
self.lr_scheduler.step() # type: ignore
if (
Meta.config["learner_config"]["lr_scheduler_config"]["lr_scheduler"]
!= "plateau"
):
self.lr_scheduler.step() # type: ignore
elif (
Meta.config["learner_config"]["lr_scheduler_config"][
"plateau_config"
]["metric"]
in metric_dict
):
self.lr_scheduler.step(
metric_dict[ # type: ignore
Meta.config["learner_config"]["lr_scheduler_config"][
"plateau_config"
]["metric"]
]
)

min_lr = Meta.config["learner_config"]["lr_scheduler_config"]["min_lr"]
if min_lr and self.optimizer.param_groups[0]["lr"] < min_lr:
Expand Down Expand Up @@ -544,6 +583,6 @@ def learn(
batches.set_postfix(self.metrics)

# Update lr using lr scheduler
self._update_lr_scheduler(model, total_batch_num)
self._update_lr_scheduler(model, total_batch_num, self.metrics)

model = self.logging_manager.close(model)
Loading

0 comments on commit 56b48fe

Please sign in to comment.