diff --git a/docs/source/reference/integration.rst b/docs/source/reference/integration.rst index 8fbcf43f8e..42bcd128f9 100644 --- a/docs/source/reference/integration.rst +++ b/docs/source/reference/integration.rst @@ -18,15 +18,6 @@ For most of the ML frameworks supported by Optuna, the corresponding Optuna inte For scikit-learn, an integrated :class:`~optuna.integration.OptunaSearchCV` estimator is available that combines scikit-learn BaseEstimator functionality with access to a class-level ``Study`` object. -MLflow ------- - -.. autosummary:: - :toctree: generated/ - :nosignatures: - - optuna.integration.MLflowCallback - Dependencies of each integration -------------------------------- diff --git a/optuna/integration/mlflow.py b/optuna/integration/mlflow.py index b608d7bd93..9f4ae3f2cd 100644 --- a/optuna/integration/mlflow.py +++ b/optuna/integration/mlflow.py @@ -1,324 +1,4 @@ -import functools -import threading -from typing import Any -from typing import Callable -from typing import Dict -from typing import List -from typing import Optional -from typing import Sequence -from typing import Union +from optuna_integration.mlflow import MLflowCallback -import optuna -from optuna._experimental import experimental_class -from optuna._experimental import experimental_func -from optuna._imports import try_import -from optuna.study.study import ObjectiveFuncType - -with try_import() as _imports: - import mlflow - -RUN_ID_ATTRIBUTE_KEY = "mlflow_run_id" - - -@experimental_class("1.4.0") -class MLflowCallback: - """Callback to track Optuna trials with MLflow. - - This callback adds relevant information that is - tracked by Optuna to MLflow. - - Example: - - Add MLflow callback to Optuna optimization. - - .. testsetup:: - - import pathlib - import tempfile - - tempdir = tempfile.mkdtemp() - YOUR_TRACKING_URI = pathlib.Path(tempdir).as_uri() - - .. testcode:: - - import optuna - from optuna.integration.mlflow import MLflowCallback - - - def objective(trial): - x = trial.suggest_float("x", -10, 10) - return (x - 2) ** 2 - - - mlflc = MLflowCallback( - tracking_uri=YOUR_TRACKING_URI, - metric_name="my metric score", - ) - - study = optuna.create_study(study_name="my_study") - study.optimize(objective, n_trials=10, callbacks=[mlflc]) - - .. testcleanup:: - - import shutil - - shutil.rmtree(tempdir) - - Args: - tracking_uri: - The URI of the MLflow tracking server. - - Please refer to `mlflow.set_tracking_uri - `_ - for more details. - metric_name: - Name assigned to optimized metric. In case of multi-objective optimization, - list of names can be passed. Those names will be assigned - to metrics in the order returned by objective function. - If single name is provided, or this argument is left to default value, - it will be broadcasted to each objective with a number suffix in order - returned by objective function e.g. two objectives and default metric name - will be logged as ``value_0`` and ``value_1``. The number of metrics must be - the same as the number of values an objective function returns. - create_experiment: - When :obj:`True`, new MLflow experiment will be created for each optimization run, - named after the Optuna study. Setting this argument to :obj:`False` lets user run - optimization under existing experiment, set via `mlflow.set_experiment - `_, - by passing ``experiment_id`` as one of ``mlflow_kwargs`` or under default MLflow - experiment, when no additional arguments are passed. Note that this argument - must be set to :obj:`False` when using Optuna with this callback within - Databricks Notebook. - mlflow_kwargs: - Set of arguments passed when initializing MLflow run. - Please refer to `MLflow API documentation - `_ - for more details. - - .. note:: - ``nest_trials`` argument added in v2.3.0 is a part of ``mlflow_kwargs`` - since v3.0.0. Anyone using ``nest_trials=True`` should migrate to - ``mlflow_kwargs={"nested": True}`` to avoid raising :exc:`TypeError`. - - tag_study_user_attrs: - Flag indicating whether or not to add the study's user attrs - to the mlflow trial as tags. Please note that when this flag is - set, key value pairs in :attr:`~optuna.study.Study.user_attrs` - will supersede existing tags. - tag_trial_user_attrs: - Flag indicating whether or not to add the trial's user attrs - to the mlflow trial as tags. Please note that when both trial and - study user attributes are logged, the latter will supersede the former - in case of a collision. - - - """ - - def __init__( - self, - tracking_uri: Optional[str] = None, - metric_name: Union[str, Sequence[str]] = "value", - create_experiment: bool = True, - mlflow_kwargs: Optional[Dict[str, Any]] = None, - tag_study_user_attrs: bool = False, - tag_trial_user_attrs: bool = True, - ) -> None: - _imports.check() - - if not isinstance(metric_name, Sequence): - raise TypeError( - "Expected metric_name to be string or sequence of strings, got {}.".format( - type(metric_name) - ) - ) - - self._tracking_uri = tracking_uri - self._metric_name = metric_name - self._create_experiment = create_experiment - self._mlflow_kwargs = mlflow_kwargs or {} - self._tag_study_user_attrs = tag_study_user_attrs - self._tag_trial_user_attrs = tag_trial_user_attrs - self._lock = threading.Lock() - - def __call__(self, study: optuna.study.Study, trial: optuna.trial.FrozenTrial) -> None: - with self._lock: - self._initialize_experiment(study) - - with mlflow.start_run( - run_id=trial.system_attrs.get(RUN_ID_ATTRIBUTE_KEY), - experiment_id=self._mlflow_kwargs.get("experiment_id"), - run_name=self._mlflow_kwargs.get("run_name") or str(trial.number), - nested=self._mlflow_kwargs.get("nested") or False, - tags=self._mlflow_kwargs.get("tags"), - ): - # This sets the metrics for MLflow. - self._log_metrics(trial.values) - - # This sets the params for MLflow. - self._log_params(trial.params) - - # This sets the tags for MLflow. - self._set_tags(trial, study) - - @experimental_func("2.9.0") - def track_in_mlflow(self) -> Callable: - """Decorator for using MLflow logging in the objective function. - - This decorator enables the extension of MLflow logging provided by the callback. - - All information logged in the decorated objective function will be added to the MLflow - run for the trial created by the callback. - - Example: - - Add additional logging to MLflow. - - .. testcode:: - - import optuna - import mlflow - from optuna.integration.mlflow import MLflowCallback - - mlflc = MLflowCallback( - tracking_uri=YOUR_TRACKING_URI, - metric_name="my metric score", - ) - - - @mlflc.track_in_mlflow() - def objective(trial): - x = trial.suggest_float("x", -10, 10) - mlflow.log_param("power", 2) - mlflow.log_metric("base of metric", x - 2) - - return (x - 2) ** 2 - - - study = optuna.create_study(study_name="my_other_study") - study.optimize(objective, n_trials=10, callbacks=[mlflc]) - - Returns: - Objective function with tracking to MLflow enabled. - """ - - def decorator(func: ObjectiveFuncType) -> ObjectiveFuncType: - @functools.wraps(func) - def wrapper(trial: optuna.trial.Trial) -> Union[float, Sequence[float]]: - with self._lock: - study = trial.study - self._initialize_experiment(study) - nested = self._mlflow_kwargs.get("nested") - - with mlflow.start_run(run_name=str(trial.number), nested=nested) as run: - trial.storage.set_trial_system_attr( - trial._trial_id, RUN_ID_ATTRIBUTE_KEY, run.info.run_id - ) - - return func(trial) - - return wrapper - - return decorator - - def _initialize_experiment(self, study: optuna.study.Study) -> None: - """Initialize an MLflow experiment with the study name. - - If a tracking uri has been provided, MLflow will be initialized to use it. - - Args: - study: Study to be tracked in MLflow. - """ - - # This sets the `tracking_uri` for MLflow. - if self._tracking_uri is not None: - mlflow.set_tracking_uri(self._tracking_uri) - - if self._create_experiment: - mlflow.set_experiment(study.study_name) - - def _set_tags(self, trial: optuna.trial.FrozenTrial, study: optuna.study.Study) -> None: - """Sets the Optuna tags for the current MLflow run. - - Args: - trial: Trial to be tracked. - study: Study to be tracked. - """ - - tags: Dict[str, Union[str, List[str]]] = {} - tags["number"] = str(trial.number) - tags["datetime_start"] = str(trial.datetime_start) - - tags["datetime_complete"] = str(trial.datetime_complete) - - # Set trial state. - if trial.state.is_finished(): - tags["state"] = trial.state.name - - # Set study directions. - directions = [d.name for d in study.directions] - tags["direction"] = directions if len(directions) != 1 else directions[0] - - distributions = {(k + "_distribution"): str(v) for (k, v) in trial.distributions.items()} - tags.update(distributions) - - if self._tag_trial_user_attrs: - tags.update(trial.user_attrs) - - if self._tag_study_user_attrs: - tags.update(study.user_attrs) - - # This is a temporary fix on Optuna side. It avoids an error with user - # attributes that are too long. It should be fixed on MLflow side later. - # When it is fixed on MLflow side this codeblock can be removed. - # see https://github.com/optuna/optuna/issues/1340 - # see https://github.com/mlflow/mlflow/issues/2931 - for key, value in tags.items(): - value = str(value) # make sure it is a string - max_val_length = mlflow.utils.validation.MAX_TAG_VAL_LENGTH - if len(value) > max_val_length: - tags[key] = "{}...".format(value[: max_val_length - 3]) - - mlflow.set_tags(tags) - - def _log_metrics(self, values: Optional[List[float]]) -> None: - """Log the trial results as metrics to MLflow. - - Args: - values: Results of a trial. - """ - if values is None: - return - - if isinstance(self._metric_name, str): - if len(values) > 1: - # Broadcast default name for multi-objective optimization. - names = ["{}_{}".format(self._metric_name, i) for i in range(len(values))] - - else: - names = [self._metric_name] - - else: - if len(self._metric_name) != len(values): - raise ValueError( - "Running multi-objective optimization " - "with {} objective values, but {} names specified. " - "Match objective values and names, or use default broadcasting.".format( - len(values), len(self._metric_name) - ) - ) - - else: - names = [*self._metric_name] - - metrics = {name: val for name, val in zip(names, values)} - mlflow.log_metrics(metrics) - - @staticmethod - def _log_params(params: Dict[str, Any]) -> None: - """Log the parameters of the trial to MLflow. - - Args: - params: Trial params. - """ - mlflow.log_params(params) +__all__ = ["MLflowCallback"] diff --git a/pyproject.toml b/pyproject.toml index 14fb520f0f..00bbbf8d2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,6 @@ document = [ "fvcore", "lightgbm", "matplotlib!=3.6.0", - "mlflow", "pandas", "pillow", "plotly>=4.9.0", # optuna/visualization. @@ -82,7 +81,6 @@ document = [ "torchvision", ] integration = [ - "mlflow", "scikit-learn>=0.24.2", "shap", "tensorflow", diff --git a/tests/integration_tests/test_mlflow.py b/tests/integration_tests/test_mlflow.py deleted file mode 100644 index 3f560f10c7..0000000000 --- a/tests/integration_tests/test_mlflow.py +++ /dev/null @@ -1,516 +0,0 @@ -from typing import Callable -from typing import List -from typing import Optional -from typing import Tuple -from typing import Union - -import numpy as np -import py -import pytest - -import optuna -from optuna._imports import try_import -from optuna.integration.mlflow import MLflowCallback - - -with try_import(): - import mlflow - from mlflow.tracking import MlflowClient - from mlflow.utils.mlflow_tags import MLFLOW_PARENT_RUN_ID - -pytestmark = pytest.mark.integration - - -def _objective_func(trial: optuna.trial.Trial) -> float: - x = trial.suggest_float("x", -1.0, 1.0) - y = trial.suggest_float("y", 20, 30, log=True) - z = trial.suggest_categorical("z", (-1.0, 1.0)) - trial.set_user_attr("my_user_attr", "my_user_attr_value") - return (x - 2) ** 2 + (y - 25) ** 2 + z - - -def _multiobjective_func(trial: optuna.trial.Trial) -> Tuple[float, float]: - x = trial.suggest_float("x", low=-1.0, high=1.0) - y = trial.suggest_float("y", low=20, high=30, log=True) - z = trial.suggest_categorical("z", (-1.0, 1.0)) - first_objective = (x - 2) ** 2 + (y - 25) ** 2 + z - second_objective = (x - 2) ** 3 + (y - 25) ** 3 - z - - return first_objective, second_objective - - -# This is tool function for a temporary fix on Optuna side. It avoids an error with user -# attributes that are too long. It should be fixed on MLflow side later. -# When it is fixed on MLflow side this test can be removed. -# see https://github.com/optuna/optuna/issues/1340 -# see https://github.com/mlflow/mlflow/issues/2931 -def _objective_func_long_user_attr(trial: optuna.trial.Trial) -> float: - x = trial.suggest_float("x", -1.0, 1.0) - y = trial.suggest_float("y", 20, 30, log=True) - z = trial.suggest_categorical("z", (-1.0, 1.0)) - long_str = str(list(range(5000))) - trial.set_user_attr("my_user_attr", long_str) - return (x - 2) ** 2 + (y - 25) ** 2 + z - - -@pytest.mark.parametrize("name,expected", [(None, "Default"), ("foo", "foo")]) -def test_use_existing_or_default_experiment( - tmpdir: py.path.local, name: Optional[str], expected: str -) -> None: - if name is not None: - tracking_uri = f"file:{tmpdir}" - mlflow.set_tracking_uri(tracking_uri) - mlflow.set_experiment(name) - - else: - # Target directory can't exist when initializing first - # run with default experiment at non-default uri. - tracking_uri = f"file:{tmpdir}/foo" - mlflow.set_tracking_uri(tracking_uri) - - mlflc = MLflowCallback(tracking_uri=tracking_uri, create_experiment=False) - study = optuna.create_study() - - for _ in range(10): - # Simulate multiple optimization runs under same experiment. - study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiment = mlfl_client.search_experiments()[0] - runs = mlfl_client.search_runs(experiment.experiment_id) - - assert experiment.name == expected - assert len(runs) == 10 - - -def test_study_name(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - n_trials = 3 - - mlflc = MLflowCallback(tracking_uri=tracking_uri) - study = optuna.create_study(study_name=study_name) - study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - assert len(mlfl_client.search_experiments()) == 1 - - experiment = mlfl_client.search_experiments()[0] - runs = mlfl_client.search_runs(experiment.experiment_id) - - assert experiment.name == study_name - assert len(runs) == n_trials - - -def test_use_existing_experiment_by_id(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - mlflow.set_tracking_uri(tracking_uri) - experiment_id = mlflow.create_experiment("foo") - - mlflow_kwargs = {"experiment_id": experiment_id} - mlflc = MLflowCallback( - tracking_uri=tracking_uri, create_experiment=False, mlflow_kwargs=mlflow_kwargs - ) - study = optuna.create_study() - - for _ in range(10): - study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiment_list = mlfl_client.search_experiments() - assert len(experiment_list) == 1 - - experiment = experiment_list[0] - assert experiment.experiment_id == experiment_id - assert experiment.name == "foo" - - runs = mlfl_client.search_runs(experiment_id) - assert len(runs) == 10 - - -def test_metric_name(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - metric_name = "my_metric_name" - - mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name) - study = optuna.create_study(study_name="my_study") - study.optimize(_objective_func, n_trials=3, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - - experiment = experiments[0] - experiment_id = experiment.experiment_id - - first_run = mlfl_client.search_runs(experiment_id)[0] - first_run_dict = first_run.to_dictionary() - - assert metric_name in first_run_dict["data"]["metrics"] - - -@pytest.mark.parametrize( - "names,expected", - [ - ("foo", ["foo_0", "foo_1"]), - (["foo", "bar"], ["foo", "bar"]), - (("foo", "bar"), ["foo", "bar"]), - ], -) -def test_metric_name_multiobjective( - tmpdir: py.path.local, names: Union[str, List[str]], expected: List[str] -) -> None: - tracking_uri = f"file:{tmpdir}" - - mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names) - study = optuna.create_study(study_name="my_study", directions=["minimize", "maximize"]) - study.optimize(_multiobjective_func, n_trials=3, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - - experiment = experiments[0] - experiment_id = experiment.experiment_id - - first_run = mlfl_client.search_runs(experiment_id)[0] - first_run_dict = first_run.to_dictionary() - - assert all([e in first_run_dict["data"]["metrics"] for e in expected]) - - -@pytest.mark.parametrize("run_name,expected", [(None, "0"), ("foo", "foo")]) -def test_run_name(tmpdir: py.path.local, run_name: Optional[str], expected: str) -> None: - tracking_uri = f"file:{tmpdir}" - - mlflow_kwargs = {"run_name": run_name} - mlflc = MLflowCallback(tracking_uri=tracking_uri, mlflow_kwargs=mlflow_kwargs) - study = optuna.create_study() - study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiment = mlfl_client.search_experiments()[0] - run = mlfl_client.search_runs(experiment.experiment_id)[0] - tags = run.data.tags - assert tags["mlflow.runName"] == expected - - -# This is a test for a temporary fix on Optuna side. It avoids an error with user -# attributes that are too long. It should be fixed on MLflow side later. -# When it is fixed on MLflow side this test can be removed. -# see https://github.com/optuna/optuna/issues/1340 -# see https://github.com/mlflow/mlflow/issues/2931 -def test_tag_truncation(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - n_trials = 3 - - mlflc = MLflowCallback(tracking_uri=tracking_uri) - study = optuna.create_study(study_name=study_name) - study.optimize(_objective_func_long_user_attr, n_trials=n_trials, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - assert len(experiments) == 1 - - experiment = experiments[0] - assert experiment.name == study_name - experiment_id = experiment.experiment_id - - runs = mlfl_client.search_runs(experiment_id) - assert len(runs) == n_trials - - first_run = runs[0] - first_run_dict = first_run.to_dictionary() - - my_user_attr = first_run_dict["data"]["tags"]["my_user_attr"] - assert len(my_user_attr) <= 5000 - - -def test_nest_trials(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - mlflow.set_tracking_uri(tracking_uri) - mlflow.set_experiment(study_name) - - mlflc = MLflowCallback(tracking_uri=tracking_uri, mlflow_kwargs={"nested": True}) - study = optuna.create_study(study_name=study_name) - - n_trials = 3 - with mlflow.start_run() as parent_run: - study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - experiment_id = experiments[0].experiment_id - - all_runs = mlfl_client.search_runs([experiment_id]) - child_runs = [r for r in all_runs if MLFLOW_PARENT_RUN_ID in r.data.tags] - - assert len(all_runs) == n_trials + 1 - assert len(child_runs) == n_trials - assert all(r.data.tags[MLFLOW_PARENT_RUN_ID] == parent_run.info.run_id for r in child_runs) - assert all(set(r.data.params.keys()) == {"x", "y", "z"} for r in child_runs) - assert all(set(r.data.metrics.keys()) == {"value"} for r in child_runs) - - -@pytest.mark.parametrize("n_jobs", [2, 4]) -def test_multiple_jobs(tmpdir: py.path.local, n_jobs: int) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - # The race-condition usually happens after first trial for each job. - n_trials = n_jobs * 2 - - mlflc = MLflowCallback(tracking_uri=tracking_uri) - study = optuna.create_study(study_name=study_name) - study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc], n_jobs=n_jobs) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - assert len(experiments) == 1 - - experiment_id = experiments[0].experiment_id - runs = mlfl_client.search_runs([experiment_id]) - assert len(runs) == n_trials - - -def test_mlflow_callback_fails_when_nest_trials_is_false_and_active_run_exists( - tmpdir: py.path.local, -) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - mlflow.set_tracking_uri(tracking_uri) - mlflow.set_experiment(study_name) - - mlflc = MLflowCallback(tracking_uri=tracking_uri) - study = optuna.create_study(study_name=study_name) - - with mlflow.start_run(): - with pytest.raises(Exception, match=r"Run with UUID \w+ is already active."): - study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) - - -def test_tag_always_logged(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - n_trials = 3 - - mlflc = MLflowCallback(tracking_uri=tracking_uri) - study = optuna.create_study(study_name=study_name) - study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiment = mlfl_client.search_experiments()[0] - runs = mlfl_client.search_runs([experiment.experiment_id]) - - assert all((r.data.tags["direction"] == "MINIMIZE") for r in runs) - assert all((r.data.tags["state"] == "COMPLETE") for r in runs) - - -@pytest.mark.parametrize("tag_study_user_attrs", [True, False]) -def test_tag_study_user_attrs(tmpdir: py.path.local, tag_study_user_attrs: bool) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - n_trials = 3 - - mlflc = MLflowCallback(tracking_uri=tracking_uri, tag_study_user_attrs=tag_study_user_attrs) - study = optuna.create_study(study_name=study_name) - study.set_user_attr("my_study_attr", "a") - study.optimize(_objective_func_long_user_attr, n_trials=n_trials, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - assert len(experiments) == 1 - - experiment = experiments[0] - assert experiment.name == study_name - experiment_id = experiment.experiment_id - - runs = mlfl_client.search_runs([experiment_id]) - assert len(runs) == n_trials - - if tag_study_user_attrs: - assert all((r.data.tags["my_study_attr"] == "a") for r in runs) - else: - assert all(("my_study_attr" not in r.data.tags) for r in runs) - - -@pytest.mark.parametrize("tag_trial_user_attrs", [True, False]) -def test_tag_trial_user_attrs(tmpdir: py.path.local, tag_trial_user_attrs: bool) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - n_trials = 3 - - mlflc = MLflowCallback(tracking_uri=tracking_uri, tag_trial_user_attrs=tag_trial_user_attrs) - study = optuna.create_study(study_name=study_name) - study.optimize(_objective_func, n_trials=n_trials, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiment = mlfl_client.search_experiments()[0] - runs = mlfl_client.search_runs([experiment.experiment_id]) - - if tag_trial_user_attrs: - assert all((r.data.tags["my_user_attr"] == "my_user_attr_value") for r in runs) - else: - assert all(("my_user_attr" not in r.data.tags) for r in runs) - - -def test_log_mlflow_tags(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - expected_tags = {"foo": 0, "bar": 1} - mlflow_kwargs = {"tags": expected_tags} - - mlflc = MLflowCallback(tracking_uri=tracking_uri, mlflow_kwargs=mlflow_kwargs) - study = optuna.create_study() - study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiment = mlfl_client.search_experiments()[0] - run = mlfl_client.search_runs(experiment.experiment_id)[0] - tags = run.data.tags - - assert all([k in tags.keys() for k in expected_tags.keys()]) - assert all([tags[key] == str(value) for key, value in expected_tags.items()]) - - -@pytest.mark.parametrize("n_jobs", [1, 2, 4]) -def test_track_in_mlflow_decorator(tmpdir: py.path.local, n_jobs: int) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - n_trials = n_jobs * 2 - - metric_name = "additional_metric" - metric = 3.14 - - mlflc = MLflowCallback(tracking_uri=tracking_uri) - - def _objective_func(trial: optuna.trial.Trial) -> float: - """Objective function""" - - x = trial.suggest_float("x", -1.0, 1.0) - y = trial.suggest_float("y", 20, 30, log=True) - z = trial.suggest_categorical("z", (-1.0, 1.0)) - trial.set_user_attr("my_user_attr", "my_user_attr_value") - mlflow.log_metric(metric_name, metric) - return (x - 2) ** 2 + (y - 25) ** 2 + z - - tracked_objective = mlflc.track_in_mlflow()(_objective_func) - - study = optuna.create_study(study_name=study_name) - study.optimize(tracked_objective, n_trials=n_trials, callbacks=[mlflc], n_jobs=n_jobs) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - assert len(experiments) == 1 - - experiment = experiments[0] - assert experiment.name == study_name - experiment_id = experiment.experiment_id - - runs = mlfl_client.search_runs(experiment_id) - assert len(runs) == n_trials - - first_run = runs[0] - first_run_dict = first_run.to_dictionary() - - assert metric_name in first_run_dict["data"]["metrics"] - assert first_run_dict["data"]["metrics"][metric_name] == metric - - assert tracked_objective.__name__ == _objective_func.__name__ - assert tracked_objective.__doc__ == _objective_func.__doc__ - - -@pytest.mark.parametrize( - "func,names,values", - [ - (_objective_func, ["metric"], [27.0]), - (_multiobjective_func, ["metric1", "metric2"], [27.0, -127.0]), - ], -) -def test_log_metric( - tmpdir: py.path.local, func: Callable, names: List[str], values: List[float] -) -> None: - tracking_uri = f"file:{tmpdir}" - study_name = "my_study" - - mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=names) - study = optuna.create_study( - study_name=study_name, directions=["minimize" for _ in range(len(values))] - ) - study.enqueue_trial({"x": 1.0, "y": 20.0, "z": 1.0}) - study.optimize(func, n_trials=1, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - experiment = experiments[0] - experiment_id = experiment.experiment_id - - runs = mlfl_client.search_runs(experiment_id) - assert len(runs) == 1 - - run = runs[0] - run_dict = run.to_dictionary() - - assert all(name in run_dict["data"]["metrics"] for name in names) - assert all([run_dict["data"]["metrics"][name] == val for name, val in zip(names, values)]) - - -def test_log_metric_none(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - metric_name = "metric" - study_name = "my_study" - - mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name) - study = optuna.create_study(study_name=study_name) - study.optimize(lambda _: np.nan, n_trials=1, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - experiment = experiments[0] - experiment_id = experiment.experiment_id - - runs = mlfl_client.search_runs(experiment_id) - assert len(runs) == 1 - - run = runs[0] - run_dict = run.to_dictionary() - - # When `values` is `None`, do not save values with metric names. - assert metric_name not in run_dict["data"]["metrics"] - - -def test_log_params(tmpdir: py.path.local) -> None: - tracking_uri = f"file:{tmpdir}" - metric_name = "metric" - study_name = "my_study" - - mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metric_name) - study = optuna.create_study(study_name=study_name) - study.enqueue_trial({"x": 1.0, "y": 20.0, "z": 1.0}) - study.optimize(_objective_func, n_trials=1, callbacks=[mlflc]) - - mlfl_client = MlflowClient(tracking_uri) - experiments = mlfl_client.search_experiments() - experiment = experiments[0] - experiment_id = experiment.experiment_id - - runs = mlfl_client.search_runs(experiment_id) - assert len(runs) == 1 - - run = runs[0] - run_dict = run.to_dictionary() - - for param_name, param_value in study.best_params.items(): - assert param_name in run_dict["data"]["params"] - assert run_dict["data"]["params"][param_name] == str(param_value) - assert run_dict["data"]["tags"][f"{param_name}_distribution"] == str( - study.best_trial.distributions[param_name] - ) - - -@pytest.mark.parametrize("metrics", [["foo"], ["foo", "bar", "baz"]]) -def test_multiobjective_raises_on_name_mismatch(tmpdir: py.path.local, metrics: List[str]) -> None: - tracking_uri = f"file:{tmpdir}" - mlflc = MLflowCallback(tracking_uri=tracking_uri, metric_name=metrics) - study = optuna.create_study(study_name="my_study", directions=["minimize", "maximize"]) - - with pytest.raises(ValueError): - study.optimize(_multiobjective_func, n_trials=1, callbacks=[mlflc]) diff --git a/tutorial/20_recipes/007_optuna_callback.py b/tutorial/20_recipes/007_optuna_callback.py index c193582bad..dbf6441c57 100644 --- a/tutorial/20_recipes/007_optuna_callback.py +++ b/tutorial/20_recipes/007_optuna_callback.py @@ -9,7 +9,7 @@ ``Callback`` is called after every evaluation of ``objective``, and it takes :class:`~optuna.study.Study` and :class:`~optuna.trial.FrozenTrial` as arguments, and does some work. -:class:`~optuna.integration.MLflowCallback` is a great example. +`MLflowCallback `_ is a great example. """