Skip to content

Commit

Permalink
FIX #116 - Pass filepath in MlflowAbstractModelDataSet through PurePo…
Browse files Browse the repository at this point in the history
…sixPath in init
  • Loading branch information
kaemo committed Nov 13, 2020
1 parent f4f4128 commit 6348014
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 15 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
exclude: ^kedro_mlflow/template/project/run.py$
repos:
- repo: https://github.com/psf/black
rev: 19.10b0
rev: 20.8b1
hooks:
- id: black
language_version: python3.7
Expand All @@ -10,11 +10,11 @@ repos:
hooks:
- id: isort
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.9
rev: 3.8.4
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
rev: v3.3.0
hooks:
- id: check-merge-conflict
- id: debug-statements
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Fixed

- Fix `TypeError: unsupported operand type(s) for /: 'str' and 'str'` when using `MlflowArtifactDataSet` with `MlflowModelSaverDataSet` ([#116](https://github.com/Galileo-Galilei/kedro-mlflow/issues/116))

## [0.4.0] - 2020-11-03

### Added
Expand Down
15 changes: 15 additions & 0 deletions docs/source/03_tutorial/06_version_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,18 @@ my_custom_model:
flavor: my_package.custom_mlflow_flavor
pyfunc_workflow: python_model # or loader_module
```

### Hwo can I save model locally and log it in MLflow in one step?

If you want to save your model both locally and remotely within the same run, you can leverage `MlflowArtifactDataSet`:

```yaml
sklearn_model:
type: kedro_mlflow.io.artifacts.MlflowArtifactDataSet
data_set:
type: kedro_mlflow.io.models.MlflowModelSaverDataSet
flavor: mlflow.sklearn
filepath: data/06_models/sklearn_model
```

This might be useful if you want to always read the lastest model saved locally and log it to MLflow each time the new model is being trained for tracking purpose.
14 changes: 7 additions & 7 deletions kedro_mlflow/io/models/mlflow_abstract_model_dataset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import importlib
from pathlib import PurePosixPath
from typing import Any, Dict, Optional

from kedro.io import AbstractVersionedDataSet, Version
from kedro.io.core import DataSetError
from kedro.io.core import DataSetError, get_protocol_and_path


class MlflowAbstractModelDataSet(AbstractVersionedDataSet):
Expand All @@ -12,7 +13,7 @@ class MlflowAbstractModelDataSet(AbstractVersionedDataSet):

def __init__(
self,
filepath,
filepath: str,
flavor: str,
pyfunc_workflow: Optional[str] = None,
load_args: Dict[str, Any] = None,
Expand All @@ -27,23 +28,22 @@ def __init__(
During load, the model is pulled from MLflow run with `run_id`.
Args:
filepath (str): Path to store the dataset locally.
flavor (str): Built-in or custom MLflow model flavor module.
Must be Python-importable.
filepath (str): Path to store the dataset locally.
run_id (Optional[str], optional): MLflow run ID to use to load
the model from or save the model to. If provided,
takes precedence over filepath. Defaults to None.
pyfunc_workflow (str, optional): Either `python_model` or `loader_module`.
See https://www.mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#workflows.
load_args (Dict[str, Any], optional): Arguments to `load_model`
function from specified `flavor`. Defaults to {}.
save_args (Dict[str, Any], optional): Arguments to `log_model`
function from specified `flavor`. Defaults to {}.
version (Version, optional): Specific version to load.
Raises:
DataSetError: When passed `flavor` does not exist.
"""
super().__init__(filepath, version)
_, path = get_protocol_and_path(filepath, version)
super().__init__(PurePosixPath(path), version)
self._flavor = flavor
self._pyfunc_workflow = pyfunc_workflow

Expand Down
2 changes: 1 addition & 1 deletion kedro_mlflow/io/models/mlflow_model_saver_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(
Parameters are passed from the Data Catalog.
During save, the model is saved locally at `filepat`
During save, the model is saved locally at `filepath`
During load, the model is loaded from the local `filepath`.
Args:
Expand Down
11 changes: 7 additions & 4 deletions tests/io/models/test_mlflow_model_saver_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,15 @@ def kedro_pipeline_model(tmp_path, pipeline_ml_obj, dummy_catalog):


def test_save_unversioned_under_same_path(
linreg_path, linreg_model,
linreg_path,
linreg_model,
):
model_config = {
"name": "linreg",
"config": {
"type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
"flavor": "mlflow.sklearn",
"filepath": linreg_path,
"filepath": linreg_path.as_posix(),
},
}
mlflow_model_ds = MlflowModelSaverDataSet.from_config(**model_config)
Expand All @@ -114,7 +115,7 @@ def test_save_load_local(linreg_path, linreg_model, versioned):
"name": "linreg",
"config": {
"type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
"filepath": linreg_path,
"filepath": linreg_path.as_posix(),
"flavor": "mlflow.sklearn",
"versioned": versioned,
},
Expand Down Expand Up @@ -143,7 +144,9 @@ def test_pyfunc_flavor_python_model_save_and_load(
"name": "kedro_pipeline_model",
"config": {
"type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
"filepath": tmp_path / "data" / "06_models" / "my_custom_model",
"filepath": (
tmp_path / "data" / "06_models" / "my_custom_model"
).as_posix(),
"flavor": "mlflow.pyfunc",
"pyfunc_workflow": "python_model",
"save_args": {"artifacts": artifacts, "conda_env": {"python": "3.7.0"}},
Expand Down

0 comments on commit 6348014

Please sign in to comment.