From 8727237cab7d2f2605742d96bfeb09e751069c7e Mon Sep 17 00:00:00 2001 From: Chen Qian Date: Mon, 11 Mar 2024 11:22:00 -0700 Subject: [PATCH] Fix misuse of private method in mlflow.data module (#11369) Signed-off-by: chenmoneygithub --- mlflow/data/artifact_dataset_sources.py | 6 +++--- mlflow/data/code_dataset_source.py | 4 ++-- mlflow/data/dataset_source.py | 8 ++++---- mlflow/data/delta_dataset_source.py | 4 ++-- mlflow/data/filesystem_dataset_source.py | 4 ++-- mlflow/data/http_dataset_source.py | 4 ++-- mlflow/data/huggingface_dataset_source.py | 4 ++-- mlflow/data/spark_dataset_source.py | 4 ++-- mlflow/data/uc_volume_dataset_source.py | 4 ++-- tests/data/test_code_dataset_source.py | 2 +- tests/resources/data/dataset_source.py | 4 ++-- 11 files changed, 24 insertions(+), 24 deletions(-) diff --git a/mlflow/data/artifact_dataset_sources.py b/mlflow/data/artifact_dataset_sources.py index 70c68c0f19652..8437e7b6b9d1a 100644 --- a/mlflow/data/artifact_dataset_sources.py +++ b/mlflow/data/artifact_dataset_sources.py @@ -140,7 +140,7 @@ def _can_resolve(raw_source: Any): def _resolve(cls, raw_source: Any) -> DatasetForArtifactRepoSourceType: return cls(str(raw_source)) - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: """ Returns: A JSON-compatible dictionary representation of the {dataset_source_name}. @@ -150,7 +150,7 @@ def _to_dict(self) -> Dict[Any, Any]: } @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> DatasetForArtifactRepoSourceType: + def from_dict(cls, source_dict: Dict[Any, Any]) -> DatasetForArtifactRepoSourceType: uri = source_dict.get("uri") if uri is None: raise MlflowException( @@ -163,7 +163,7 @@ def _from_dict(cls, source_dict: Dict[Any, Any]) -> DatasetForArtifactRepoSource ArtifactRepoSource.__name__ = dataset_source_name ArtifactRepoSource.__qualname__ = dataset_source_name ArtifactRepoSource.__doc__ = class_docstring - ArtifactRepoSource._to_dict.__doc__ = ArtifactRepoSource._to_dict.__doc__.format( + ArtifactRepoSource.to_dict.__doc__ = ArtifactRepoSource.to_dict.__doc__.format( dataset_source_name=dataset_source_name ) ArtifactRepoSource.uri.__doc__ = ArtifactRepoSource.uri.__doc__.format(scheme=scheme) diff --git a/mlflow/data/code_dataset_source.py b/mlflow/data/code_dataset_source.py index fa027397d50f5..69577e0c462e1 100644 --- a/mlflow/data/code_dataset_source.py +++ b/mlflow/data/code_dataset_source.py @@ -28,11 +28,11 @@ def _can_resolve(raw_source: Any): def _resolve(cls, raw_source: str) -> "CodeDatasetSource": raise NotImplementedError - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: return {"tags": self._tags} @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "CodeDatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "CodeDatasetSource": return cls( tags=source_dict.get("tags"), ) diff --git a/mlflow/data/dataset_source.py b/mlflow/data/dataset_source.py index 64ada80946cc8..e45ae2cc1ac0a 100644 --- a/mlflow/data/dataset_source.py +++ b/mlflow/data/dataset_source.py @@ -64,7 +64,7 @@ def _resolve(cls, raw_source: Any) -> "DatasetSource": """ @abstractmethod - def _to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> Dict[str, Any]: """Obtains a JSON-compatible dictionary representation of the DatasetSource. Returns: @@ -81,11 +81,11 @@ def to_json(self) -> str: A JSON string representation of the :py:class:`DatasetSource `. """ - return json.dumps(self._to_dict()) + return json.dumps(self.to_dict()) @classmethod @abstractmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "DatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "DatasetSource": """Constructs an instance of the DatasetSource from a dictionary representation. Args: @@ -107,4 +107,4 @@ def from_json(cls, source_json: str) -> "DatasetSource": A DatasetSource instance. """ - return cls._from_dict(json.loads(source_json)) + return cls.from_dict(json.loads(source_json)) diff --git a/mlflow/data/delta_dataset_source.py b/mlflow/data/delta_dataset_source.py index c79662c544ee5..ef61048eeffdc 100644 --- a/mlflow/data/delta_dataset_source.py +++ b/mlflow/data/delta_dataset_source.py @@ -137,7 +137,7 @@ def _lookup_table_id(self, table_name): except Exception: return None - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: info = {} if self._path: info["path"] = self._path @@ -154,7 +154,7 @@ def _to_dict(self) -> Dict[Any, Any]: return info @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "DeltaDatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "DeltaDatasetSource": return cls( path=source_dict.get("path"), delta_table_name=source_dict.get("delta_table_name"), diff --git a/mlflow/data/filesystem_dataset_source.py b/mlflow/data/filesystem_dataset_source.py index 70a40e41b5bea..5a917a2ea69d1 100644 --- a/mlflow/data/filesystem_dataset_source.py +++ b/mlflow/data/filesystem_dataset_source.py @@ -66,7 +66,7 @@ def _resolve(cls, raw_source: Any) -> "FileSystemDatasetSource": """ @abstractmethod - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: """ Returns: A JSON-compatible dictionary representation of the FileSystemDatasetSource. @@ -74,7 +74,7 @@ def _to_dict(self) -> Dict[Any, Any]: @classmethod @abstractmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "FileSystemDatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "FileSystemDatasetSource": """ Args: source_dict: A dictionary representation of the FileSystemDatasetSource. diff --git a/mlflow/data/http_dataset_source.py b/mlflow/data/http_dataset_source.py index 1f68b6fb4b3d5..3fcc2dd401475 100644 --- a/mlflow/data/http_dataset_source.py +++ b/mlflow/data/http_dataset_source.py @@ -120,7 +120,7 @@ def _resolve(cls, raw_source: Any) -> "HTTPDatasetSource": """ return HTTPDatasetSource(raw_source) - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: """ Returns: A JSON-compatible dictionary representation of the HTTPDatasetSource. @@ -130,7 +130,7 @@ def _to_dict(self) -> Dict[Any, Any]: } @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "HTTPDatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "HTTPDatasetSource": """ Args: source_dict: A dictionary representation of the HTTPDatasetSource. diff --git a/mlflow/data/huggingface_dataset_source.py b/mlflow/data/huggingface_dataset_source.py index a4c3429547467..5687297ae8c7a 100644 --- a/mlflow/data/huggingface_dataset_source.py +++ b/mlflow/data/huggingface_dataset_source.py @@ -87,7 +87,7 @@ def _can_resolve(raw_source: Any): def _resolve(cls, raw_source: str) -> "HuggingFaceDatasetSource": raise NotImplementedError - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: return { "path": self.path, "config_name": self.config_name, @@ -98,7 +98,7 @@ def _to_dict(self) -> Dict[Any, Any]: } @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "HuggingFaceDatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "HuggingFaceDatasetSource": return cls( path=source_dict.get("path"), config_name=source_dict.get("config_name"), diff --git a/mlflow/data/spark_dataset_source.py b/mlflow/data/spark_dataset_source.py index eb671367eb984..9e1ad26d64017 100644 --- a/mlflow/data/spark_dataset_source.py +++ b/mlflow/data/spark_dataset_source.py @@ -55,7 +55,7 @@ def _can_resolve(raw_source: Any): def _resolve(cls, raw_source: str) -> "SparkDatasetSource": raise NotImplementedError - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: info = {} if self._path is not None: info["path"] = self._path @@ -66,7 +66,7 @@ def _to_dict(self) -> Dict[Any, Any]: return info @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "SparkDatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "SparkDatasetSource": return cls( path=source_dict.get("path"), table_name=source_dict.get("table_name"), diff --git a/mlflow/data/uc_volume_dataset_source.py b/mlflow/data/uc_volume_dataset_source.py index 92bd8f629188f..c6f2e3b83ad57 100644 --- a/mlflow/data/uc_volume_dataset_source.py +++ b/mlflow/data/uc_volume_dataset_source.py @@ -65,9 +65,9 @@ def _can_resolve(raw_source: Any): def _resolve(cls, raw_source: str): raise NotImplementedError - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: return {"path": self.path} @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> "UCVolumeDatasetSource": + def from_dict(cls, source_dict: Dict[Any, Any]) -> "UCVolumeDatasetSource": return cls(**source_dict) diff --git a/tests/data/test_code_dataset_source.py b/tests/data/test_code_dataset_source.py index a750f73a514e5..dd8664ab675f8 100644 --- a/tests/data/test_code_dataset_source.py +++ b/tests/data/test_code_dataset_source.py @@ -7,7 +7,7 @@ def test_code_dataset_source_from_path(): "mlflow_source_name": "some_random_notebook_path", } code_datasource = CodeDatasetSource(tags) - assert code_datasource._to_dict() == { + assert code_datasource.to_dict() == { "tags": tags, } diff --git a/tests/resources/data/dataset_source.py b/tests/resources/data/dataset_source.py index 11c4057e41553..93c15db68beb1 100644 --- a/tests/resources/data/dataset_source.py +++ b/tests/resources/data/dataset_source.py @@ -39,11 +39,11 @@ def _can_resolve(raw_source: Any) -> bool: def _resolve(cls, raw_source: Any) -> DatasetSource: return cls(raw_source) - def _to_dict(self) -> Dict[Any, Any]: + def to_dict(self) -> Dict[Any, Any]: return {"uri": self.uri} @classmethod - def _from_dict(cls, source_dict: Dict[Any, Any]) -> DatasetSource: + def from_dict(cls, source_dict: Dict[Any, Any]) -> DatasetSource: uri = source_dict.get("uri") if uri is None: raise MlflowException(