Skip to content

Commit

Permalink
Vision: Added exception if invalid kwargs specified (open-mmlab#1251)
Browse files Browse the repository at this point in the history
* Vision: Error if invalid kwargs specified

* Fixed vision unit tests

* Fixed vision docs

* Fix vision HPO
  • Loading branch information
Innixma committed Jul 27, 2021
1 parent dab2511 commit a9bb0e1
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 64 deletions.
7 changes: 6 additions & 1 deletion core/src/autogluon/core/scheduler/seq_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,12 @@ def run(self, **kwargs):
r = range(self.num_trials)
for i in (tqdm(r) if self.num_trials < 1000 else r):
trial_start_time = time.time()
is_failed, result = self.run_trial(task_id=i)
try:
is_failed, result = self.run_trial(task_id=i)
except Exception:
# TODO: Add special exception type when there are no more new configurations to try (exhausted search space)
logger.log(30, f'\tWARNING: Encountered unexpected exception during trial {i}, stopping HPO early.')
break
trial_end_time = time.time()
trial_run_times.append(np.NaN if is_failed else (trial_end_time - trial_start_time))

Expand Down
6 changes: 3 additions & 3 deletions core/src/autogluon/core/task/base/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,7 @@ def run_fit(cls, train_fn, search_strategy, scheduler_options,
plot_results=False):
start_time = time.time()
# create scheduler and schedule tasks
scheduler = create_scheduler(
train_fn, search_strategy, scheduler_options)
scheduler = create_scheduler(train_fn, search_strategy, scheduler_options)
scheduler.run()
scheduler.join_jobs()
# gather the best configuration
Expand All @@ -64,7 +63,8 @@ def run_fit(cls, train_fn, search_strategy, scheduler_options,
args.final_fit = True
if hasattr(args, 'epochs') and hasattr(args, 'final_fit_epochs'):
args.epochs = args.final_fit_epochs
results = scheduler.run_with_config(best_config)
scheduler_final = create_scheduler(train_fn, search_strategy, scheduler_options)
results = scheduler_final.run_with_config(best_config)
total_time = time.time() - start_time
if plot_results or in_ipynb():
plot_training_curves = scheduler_options['checkpoint'].replace('exp1.ag', 'plot_training_curves.png')
Expand Down
40 changes: 2 additions & 38 deletions docs/tutorials/image_prediction/hpo.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ parallel evaluations.
```{.python .input}
hyperparameters={'model': model, 'batch_size': batch_size, 'lr': lr, 'epochs': 2}
predictor = ImagePredictor()
predictor.fit(train_data, search_strategy='bayesopt', time_limit=60*10, hyperparameters=hyperparameters,
hyperparameter_tune_kwargs={'num_trials': 2})
predictor.fit(train_data, time_limit=60*10, hyperparameters=hyperparameters,
hyperparameter_tune_kwargs={'searcher': 'bayesopt', 'num_trials': 2})
print('Top-1 val acc: %.3f' % predictor.fit_summary()['valid_acc'])
```

Expand All @@ -90,39 +90,3 @@ print('Test acc on hold-out data:', top1)

Note that `num_trials=2` above is only used to speed up the tutorial. In normal
practice, it is common to only use `time_limit` and drop `num_trials`.

### Hyperband Early Stopping

AutoGluon currently supports scheduling trials in serial order and with early
stopping (e.g., if the performance of the model early within training already
looks bad, the trial may be terminated early to free up resources).
Here is an example of using an early stopping scheduler
:class:`autogluon.core.scheduler.HyperbandScheduler`. `scheduler_options` is used
to configure the scheduler. In this example, we run Hyperband with a single
bracket, and stop/go decisions are made after 1 and 2 epochs (`grace_period`,
`grace_period * reduction_factor`):

```{.python .input}
hyperparameters.update({
'search_strategy': 'hyperband',
'grace_period': 1
})
```

The `fit`, `evaluate` and `predict` processes are exactly the same, so we will skip training to save some time.

### Bayesian Optimization and Hyperband ###

While Hyperband scheduling is normally driven by a random searcher, AutoGluon
also provides Hyperband together with Bayesian optimization. The tuning of expensive
DL models typically works best with this combination.

```{.python .input}
hyperparameters.update({
'search_strategy': 'bayesopt_hyperband',
'grace_period': 1
})
```

For a comparison of different search algorithms and scheduling strategies, see :ref:`course_alg`.
For more options using `fit`, see :class:`autogluon.vision.ImagePredictor`.
10 changes: 4 additions & 6 deletions docs/tutorials/object_detection/beginner.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@ Object detection is the process of identifying and localizing objects in an imag

Our goal is to detect motorbike in images by [YOLOv3 model](https://pjreddie.com/media/files/papers/YOLOv3.pdf). A tiny dataset is collected from VOC dataset, which only contains the motorbike category. The model pretrained on the COCO dataset is used to fine-tune our small dataset. With the help of AutoGluon, we are able to try many models with different hyperparameters automatically, and return the best one as our final model.

To start, import autogluon.vision and ObjectDetector:
To start, import ObjectDetector:

```{.python .input}
import autogluon.core as ag
from autogluon.vision import ObjectDetector
```

Expand All @@ -33,16 +32,15 @@ We `fit` a classifier using AutoGluon as follows. In each experiment (one trial
time_limit = 60*30 # at most 0.5 hour
detector = ObjectDetector()
hyperparameters = {'epochs': 5, 'batch_size': 8}
hyperparamter_tune_kwargs={'num_trials': 2}
detector.fit(dataset_train, time_limit=time_limit, hyperparameters=hyperparameters, hyperparamter_tune_kwargs=hyperparamter_tune_kwargs)
hyperparameter_tune_kwargs={'num_trials': 2}
detector.fit(dataset_train, time_limit=time_limit, hyperparameters=hyperparameters, hyperparameter_tune_kwargs=hyperparameter_tune_kwargs)
```

Note that `num_trials=2` above is only used to speed up the tutorial. In normal
practice, it is common to only use `time_limit` and drop `num_trials`. Also note
that hyperparameter tuning defaults to random search. Model-based variants, such
as `search_strategy='bayesopt'` or `search_strategy='bayesopt_hyperband'` can be
a lot more sample-efficient.
as `searcher='bayesopt'` in `hyperparameter_tune_kwargs` can be a lot more sample-efficient.

After fitting, AutoGluon automatically returns the best model among all models in the searching space. From the output, we know the best model is the one trained with the second learning rate. To see how well the returned model performed on test dataset, call detector.evaluate().

Expand Down
19 changes: 13 additions & 6 deletions vision/src/autogluon/vision/detector/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from autogluon.core.utils import set_logger_verbosity
from gluoncv.auto.tasks import ObjectDetection as _ObjectDetection
from ..configs.presets_configs import unpack, _check_gpu_memory_presets
from ..utils import MXNetErrorCatcher

__all__ = ['ObjectDetector']

Expand Down Expand Up @@ -203,7 +202,7 @@ def fit(self,
num_trials : int, default = 1
The limit of HPO trials that can be performed within `time_limit`. The HPO process will be terminated
when `num_trials` trials have finished or wall clock `time_limit` is reached, whichever comes first.
search_strategy : str, default = 'random'
searcher : str, default = 'random'
Searcher strategy for HPO, 'random' by default.
Options include: ‘random’ (random search), ‘bayesopt’ (Gaussian process Bayesian optimization),
‘grid’ (grid search).
Expand Down Expand Up @@ -303,10 +302,9 @@ def fit(self,
task._logger.propagate = True
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
with MXNetErrorCatcher() as err:
self._detector = task.fit(train_data, tuning_data, 1 - holdout_frac, random_state)
if err.exc_value is not None:
raise RuntimeError(err.exc_value)
# TODO: MXNetErrorCatcher was removed because it didn't return traceback,
# Re-add once it returns full traceback regardless of which exception was caught
self._detector = task.fit(train_data, tuning_data, 1 - holdout_frac, random_state)
self._detector._logger.setLevel(log_level)
self._detector._logger.propagate = True
self._fit_summary = task.fit_summary()
Expand Down Expand Up @@ -356,6 +354,15 @@ def _validate_data(self, data):

def _validate_kwargs(self, kwargs):
"""validate and initialize default kwargs"""

valid_kwargs = {'holdout_frac', 'random_state', 'nthreads_per_trial', 'ngpus_per_trial', 'hyperparameter_tune_kwargs'}
invalid_kwargs = []
for key in kwargs:
if key not in valid_kwargs:
invalid_kwargs.append(key)
if invalid_kwargs:
raise KeyError(f'Invalid kwargs specified: {invalid_kwargs}. Valid kwargs names: {list(valid_kwargs)}')

kwargs['holdout_frac'] = kwargs.get('holdout_frac', 0.1)
if not (0 < kwargs['holdout_frac'] < 1.0):
raise ValueError(f'Range error for `holdout_frac`, expected to be within range (0, 1), given {kwargs["holdout_frac"]}')
Expand Down
20 changes: 14 additions & 6 deletions vision/src/autogluon/vision/predictor/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from autogluon.core.utils import verbosity2loglevel, get_gpu_count
from autogluon.core.utils.utils import generate_train_test_split
from ..configs.presets_configs import unpack, _check_gpu_memory_presets
from ..utils import MXNetErrorCatcher, sanitize_batch_size
from ..utils import sanitize_batch_size

__all__ = ['ImagePredictor']

Expand Down Expand Up @@ -220,7 +220,7 @@ def fit(self,
num_trials : int, default = 1
The limit of HPO trials that can be performed within `time_limit`. The HPO process will be terminated
when `num_trials` trials have finished or wall clock `time_limit` is reached, whichever comes first.
search_strategy : str, default = 'random'
searcher : str, default = 'random'
Searcher strategy for HPO, 'random' by default.
Options include: ‘random’ (random search), ‘bayesopt’ (Gaussian process Bayesian optimization),
‘grid’ (grid search).
Expand Down Expand Up @@ -409,10 +409,9 @@ def fit(self,
self._train_classes = train_data.classes
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
with MXNetErrorCatcher() as err:
self._classifier = task.fit(train_data, tuning_data, 1 - holdout_frac, random_state)
if err.exc_value is not None:
raise RuntimeError(err.exc_value + err.hint)
# TODO: MXNetErrorCatcher was removed because it didn't return traceback
# Re-add once it returns full traceback regardless of which exception was caught
self._classifier = task.fit(train_data, tuning_data, 1 - holdout_frac, random_state)
self._classifier._logger.setLevel(log_level)
self._classifier._logger.propagate = True
self._fit_summary = task.fit_summary()
Expand Down Expand Up @@ -475,6 +474,15 @@ def _validate_data(self, data):

def _validate_kwargs(self, kwargs):
"""validate and initialize default kwargs"""

valid_kwargs = {'holdout_frac', 'random_state', 'nthreads_per_trial', 'ngpus_per_trial', 'hyperparameter_tune_kwargs'}
invalid_kwargs = []
for key in kwargs:
if key not in valid_kwargs:
invalid_kwargs.append(key)
if invalid_kwargs:
raise KeyError(f'Invalid kwargs specified: {invalid_kwargs}. Valid kwargs names: {list(valid_kwargs)}')

kwargs['holdout_frac'] = kwargs.get('holdout_frac', 0.1)
if not (0 < kwargs['holdout_frac'] < 1.0):
raise ValueError(f'Range error for `holdout_frac`, expected to be within range (0, 1), given {kwargs["holdout_frac"]}')
Expand Down
4 changes: 2 additions & 2 deletions vision/tests/unittests/test_image_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_task():
dataset, _, test_dataset = ImageDataset.from_folders('https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip')
model_list = ImagePredictor.list_models()
classifier = ImagePredictor()
classifier.fit(dataset, num_trials=2, hyperparameters={'epochs': 1, 'early_stop_patience': 3})
classifier.fit(dataset, hyperparameters={'epochs': 1, 'early_stop_patience': 3}, hyperparameter_tune_kwargs={'num_trials': 2})
assert classifier.fit_summary()['valid_acc'] > 0.1, 'valid_acc is abnormal'
test_result = classifier.predict(test_dataset)
single_test = classifier.predict(test_dataset.iloc[0]['image'])
Expand Down Expand Up @@ -69,4 +69,4 @@ def test_image_predictor_presets():
train_dataset, _, test_dataset = ImageDataset.from_folders('https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip')
for preset in ['medium_quality_faster_train', 'medium_quality_faster_inference']:
predictor = ImagePredictor()
predictor.fit(train_dataset,tuning_data=test_dataset, presets=[preset], time_limit=60, hyperparameters={'epochs':1})
predictor.fit(train_dataset, tuning_data=test_dataset, presets=[preset], time_limit=60, hyperparameters={'epochs': 1})
2 changes: 1 addition & 1 deletion vision/tests/unittests/test_image_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_task():
dataset, _, test_dataset = Task.Dataset.from_folders('https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip')
model_list = Task.list_models()
predictor = Task(problem_type='regression')
predictor.fit(dataset, num_trials=2, hyperparameters={'epochs': 3, 'batch_size': 8})
predictor.fit(dataset, hyperparameters={'epochs': 3, 'batch_size': 8}, hyperparameter_tune_kwargs={'num_trials': 2})
test_result = predictor.predict(test_dataset)
single_test = predictor.predict(test_dataset.iloc[0]['image'])
predictor.save('regressor.ag')
Expand Down
2 changes: 1 addition & 1 deletion vision/tests/unittests/test_object_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def test_task():
train_data, _, test_data = dataset.random_split()

detector = Task()
detector.fit(train_data, num_trials=1, hyperparameters={'batch_size': 4, 'epochs': 5, 'early_stop_max_value': 0.2})
detector.fit(train_data, hyperparameters={'batch_size': 4, 'epochs': 5, 'early_stop_max_value': 0.2}, hyperparameter_tune_kwargs={'num_trials': 1})
test_result = detector.predict(test_data)
detector.save('detector.ag')
detector2 = Task.load('detector.ag')
Expand Down

0 comments on commit a9bb0e1

Please sign in to comment.