Skip to content

Commit

Permalink
feat: Deprecate datasets module, rename to inferences (#2785)
Browse files Browse the repository at this point in the history
* Initial refactor of Datasets -> Inferences

* Add deprecation warnings to "datasets" public interface

* Fix type signature

* Ensure ExampleDatasets still exists

* Use multiline string

* Rename test files

* Remove test for `Inference.from_open_inference`

* exclude manual instrumentation example from type checks

* Revert changes to notebooks

* Rename `Inference` -> `Inferences`

* Fix line length

---------

Co-authored-by: Alexander Song <axiomofjoy@gmail.com>
  • Loading branch information
anticorrelator and axiomofjoy committed Apr 8, 2024
1 parent 142944d commit 4987ea3
Show file tree
Hide file tree
Showing 22 changed files with 1,418 additions and 1,395 deletions.
10 changes: 7 additions & 3 deletions src/phoenix/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from typing import Any, Optional

from .datasets.dataset import Dataset
from .datasets.fixtures import ExampleDatasets, load_example
from .datasets.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames, Schema
from .datasets.fixtures import ExampleDatasets
from .inferences.fixtures import ExampleInferences, load_example
from .inferences.inferences import Inferences
from .inferences.schema import EmbeddingColumnNames, RetrievalEmbeddingColumnNames, Schema
from .session.client import Client
from .session.evaluation import log_evaluations
from .session.session import NotebookEnvironment, Session, active_session, close_app, launch_app
Expand All @@ -33,11 +35,13 @@
__all__ = [
"__version__",
"Dataset",
"ExampleDatasets",
"Inferences",
"EmbeddingColumnNames",
"RetrievalEmbeddingColumnNames",
"Schema",
"load_example",
"ExampleDatasets",
"ExampleInferences",
"active_session",
"close_app",
"launch_app",
Expand Down
14 changes: 8 additions & 6 deletions src/phoenix/core/model.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from typing import List, Optional, Union

from phoenix.datasets.dataset import Dataset
from phoenix.datasets.schema import EmbeddingColumnNames, EmbeddingFeatures
from phoenix.inferences.inferences import Inferences
from phoenix.inferences.schema import EmbeddingColumnNames, EmbeddingFeatures

from .embedding_dimension import EmbeddingDimension


def _get_embedding_dimensions(
primary_dataset: Dataset, reference_dataset: Optional[Dataset]
primary_dataset: Inferences, reference_dataset: Optional[Inferences]
) -> List[EmbeddingDimension]:
embedding_dimensions: List[EmbeddingDimension] = []
embedding_features: EmbeddingFeatures = {}
Expand Down Expand Up @@ -58,8 +58,8 @@ def _get_embedding_dimensions(
def _check_embedding_vector_lengths_match_across_datasets(
embedding_feature_name: str,
embedding_column_names: EmbeddingColumnNames,
primary_dataset: Dataset,
reference_dataset: Dataset,
primary_dataset: Inferences,
reference_dataset: Inferences,
) -> None:
"""
Ensure that for each embedding feature, the vector lengths match across the primary
Expand All @@ -86,7 +86,9 @@ def _check_embedding_vector_lengths_match_across_datasets(
)


def _get_column_vector_length(dataset: Dataset, embedding_vector_column_name: str) -> Optional[int]:
def _get_column_vector_length(
dataset: Inferences, embedding_vector_column_name: str
) -> Optional[int]:
"""
Because a dataset has already been constructed, we can assume that the lengths
of the vectors for any given embedding feature in the dataset are the same.
Expand Down
12 changes: 6 additions & 6 deletions src/phoenix/core/model_schema_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
from pandas.api.types import is_object_dtype
from typing_extensions import TypeAlias, TypeGuard

from phoenix import Dataset, EmbeddingColumnNames
from phoenix import EmbeddingColumnNames, Inferences
from phoenix.core.model import _get_embedding_dimensions
from phoenix.core.model_schema import Embedding, Model, RetrievalEmbedding, Schema
from phoenix.datasets.schema import RetrievalEmbeddingColumnNames
from phoenix.datasets.schema import Schema as DatasetSchema
from phoenix.inferences.schema import RetrievalEmbeddingColumnNames
from phoenix.inferences.schema import Schema as DatasetSchema

DatasetName: TypeAlias = str
ColumnName: TypeAlias = str
DisplayName: TypeAlias = str


def create_model_from_datasets(*datasets: Optional[Dataset]) -> Model:
def create_model_from_datasets(*datasets: Optional[Inferences]) -> Model:
# TODO: move this validation into model_schema.Model.
if len(datasets) > 1 and datasets[0] is not None:
# Check that for each embedding dimension all vectors
Expand Down Expand Up @@ -132,8 +132,8 @@ def create_model_from_datasets(*datasets: Optional[Dataset]) -> Model:
)


def _is_dataset(obj: Optional[Dataset]) -> TypeGuard[Dataset]:
return type(obj) is Dataset
def _is_dataset(obj: Optional[Inferences]) -> TypeGuard[Inferences]:
return type(obj) is Inferences


def _take_first_str(iterator: Iterable[str]) -> str:
Expand Down

0 comments on commit 4987ea3

Please sign in to comment.