Skip to content

Commit

Permalink
Merge pull request #445 from skalish/by-name
Browse files Browse the repository at this point in the history
TC: by_name functions for projects and datasets
  • Loading branch information
pcattori committed Sep 1, 2020
2 parents f276f57 + 75662ef commit 0f10ba3
Show file tree
Hide file tree
Showing 33 changed files with 388 additions and 101 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
- [#435](https://github.com/Datatamer/tamr-client/pull/435) Now able to create projects of the following type in Tamr: Categorization, Mastering, Schema Mapping
- [#440](https://github.com/Datatamer/tamr-client/pull/440) Added functions for initiating basic mastering workflow operations in `tc.mastering`
- [#443](https://github.com/Datatamer/tamr-client/pull/443) Added function to materialize datasets.
- [#445](https://github.com/Datatamer/tamr-client/pull/445) Added functions for getting projects and datasets by name via `tc.project.by_name` and `tc.dataset.by_name`
- Renamed functions `from_resource_id` to `by_resource_id` in `tc.attribute`, `tc.dataset`, `tc.operation`, and `tc.project`

**NEW FEATURES**
- [#383](https://github.com/Datatamer/tamr-client/issues/383) Now able to create an Operation from Job resource id
Expand Down
2 changes: 1 addition & 1 deletion docs/beta/attribute/attribute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Attribute

.. autoclass:: tamr_client.Attribute

.. autofunction:: tamr_client.attribute.from_resource_id
.. autofunction:: tamr_client.attribute.by_resource_id
.. autofunction:: tamr_client.attribute.to_json
.. autofunction:: tamr_client.attribute.create
.. autofunction:: tamr_client.attribute.update
Expand Down
6 changes: 5 additions & 1 deletion docs/beta/dataset/dataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ Dataset

.. autoclass:: tamr_client.Dataset

.. autofunction:: tamr_client.dataset.from_resource_id
.. autofunction:: tamr_client.dataset.by_resource_id
.. autofunction:: tamr_client.dataset.by_name
.. autofunction:: tamr_client.dataset.attributes
.. autofunction:: tamr_client.dataset.materialize

Expand All @@ -12,3 +13,6 @@ Exceptions

.. autoclass:: tamr_client.dataset.NotFound
:no-inherited-members:

.. autoclass:: tamr_client.dataset.Ambiguous
:no-inherited-members:
2 changes: 1 addition & 1 deletion docs/beta/operation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Operation
.. autofunction:: tamr_client.operation.poll
.. autofunction:: tamr_client.operation.wait
.. autofunction:: tamr_client.operation.succeeded
.. autofunction:: tamr_client.operation.from_resource_id
.. autofunction:: tamr_client.operation.by_resource_id
6 changes: 5 additions & 1 deletion docs/beta/project.rst
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
Project
=======

.. autofunction:: tamr_client.project.from_resource_id
.. autofunction:: tamr_client.project.by_resource_id
.. autofunction:: tamr_client.project.by_name

Exceptions
----------

.. autoclass:: tamr_client.project.NotFound
:no-inherited-members:

.. autoclass:: tamr_client.project.Ambiguous
:no-inherited-members:
2 changes: 1 addition & 1 deletion tamr_client/attribute/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from tamr_client.attribute._attribute import (
_from_json,
AlreadyExists,
by_resource_id,
create,
delete,
from_resource_id,
NotFound,
ReservedName,
to_json,
Expand Down
6 changes: 3 additions & 3 deletions tamr_client/attribute/_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class ReservedName(TamrClientException):
pass


def from_resource_id(session: Session, dataset: Dataset, id: str) -> Attribute:
def by_resource_id(session: Session, dataset: Dataset, id: str) -> Attribute:
"""Get attribute by resource ID
Fetches attribute from Tamr server
Expand All @@ -65,10 +65,10 @@ def from_resource_id(session: Session, dataset: Dataset, id: str) -> Attribute:
requests.HTTPError: If any other HTTP error is encountered.
"""
url = replace(dataset.url, path=dataset.url.path + f"/attributes/{id}")
return _from_url(session, url)
return _by_url(session, url)


def _from_url(session: Session, url: URL) -> Attribute:
def _by_url(session: Session, url: URL) -> Attribute:
"""Get attribute by URL
Fetches attribute from Tamr server
Expand Down
25 changes: 6 additions & 19 deletions tamr_client/categorization/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def create(
Project created in Tamr
Raises:
AlreadyExists: If a project with these specifications already exists
attribute.AlreadyExists: If a project with these specifications already exists
requests.HTTPError: If any other HTTP error is encountered
"""
return project._create(
Expand All @@ -60,34 +60,21 @@ def create(
)


def manual_labels(
session: Session, instance: Instance, project: CategorizationProject
) -> Dataset:
def manual_labels(session: Session, project: CategorizationProject) -> Dataset:
"""Get manual labels from a Categorization project.
Args:
instance: Tamr instance containing project
project: Tamr project containing labels
Returns:
Dataset containing manual labels
Raises:
_dataset.NotFound: If no dataset could be found at the specified URL
Ambiguous: If multiple targets match dataset name
dataset.NotFound: If no dataset could be found at the specified URL
dataset.Ambiguous: If multiple targets match dataset name
"""
unified_dataset = unified.from_project(session=session, project=project)
labels_dataset_name = unified_dataset.name + "_manual_categorizations"
datasets_url = URL(instance=instance, path="datasets")
r = session.get(
url=str(datasets_url), params={"filter": f"name=={labels_dataset_name}"}
return _dataset.by_name(
session=session, instance=project.url.instance, name=labels_dataset_name
)
matches = r.json()
if len(matches) == 0:
raise _dataset.NotFound(str(r.url))
if len(matches) > 1:
raise _dataset.Ambiguous(str(r.url))

dataset_path = matches[0]["relativeId"]
dataset_url = URL(instance=instance, path=dataset_path)
return _dataset._from_url(session=session, url=dataset_url)
4 changes: 3 additions & 1 deletion tamr_client/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from tamr_client.dataset import dataframe, record, unified
from tamr_client.dataset._dataset import (
_materialize_async,
Ambiguous,
attributes,
from_resource_id,
by_name,
by_resource_id,
materialize,
NotFound,
)
37 changes: 34 additions & 3 deletions tamr_client/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class Ambiguous(TamrClientException):
pass


def from_resource_id(session: Session, instance: Instance, id: str) -> Dataset:
def by_resource_id(session: Session, instance: Instance, id: str) -> Dataset:
"""Get dataset by resource ID
Fetches dataset from Tamr server
Expand All @@ -48,10 +48,41 @@ def from_resource_id(session: Session, instance: Instance, id: str) -> Dataset:
requests.HTTPError: If any other HTTP error is encountered.
"""
url = URL(instance=instance, path=f"datasets/{id}")
return _from_url(session, url)
return _by_url(session, url)


def _from_url(session: Session, url: URL) -> Dataset:
def by_name(session: Session, instance: Instance, name: str) -> Dataset:
"""Get dataset by name
Fetches dataset from Tamr server
Args:
instance: Tamr instance containing this dataset
name: Dataset name
Raises:
dataset.NotFound: If no dataset could be found with that name.
dataset.Ambiguous: If multiple targets match dataset name.
requests.HTTPError: If any other HTTP error is encountered.
"""
r = session.get(
url=str(URL(instance=instance, path="datasets")),
params={"filter": f"name=={name}"},
)

# Check that exactly one dataset is returned
matches = r.json()
if len(matches) == 0:
raise NotFound(str(r.url))
if len(matches) > 1:
raise Ambiguous(str(r.url))

# Make Dataset from response
url = URL(instance=instance, path=matches[0]["relativeId"])
return _from_json(url=url, data=matches[0])


def _by_url(session: Session, url: URL) -> Dataset:
"""Get dataset by URL
Fetches dataset from Tamr server
Expand Down
4 changes: 2 additions & 2 deletions tamr_client/dataset/unified.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def from_project(session: Session, project: Project) -> UnifiedDataset:
requests.HTTPError: If any other HTTP error is encountered.
"""
url = URL(instance=project.url.instance, path=f"{project.url.path}/unifiedDataset")
return _from_url(session, url)
return _by_url(session, url)


def _from_url(session: Session, url: URL) -> UnifiedDataset:
def _by_url(session: Session, url: URL) -> UnifiedDataset:
"""Get dataset by URL
Fetches dataset from Tamr server
Expand Down
8 changes: 3 additions & 5 deletions tamr_client/operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def poll(session: Session, operation: Operation) -> Operation:
Args:
operation: Operation to be polled.
"""
return _from_url(session, operation.url)
return _by_url(session, operation.url)


def wait(
Expand Down Expand Up @@ -68,9 +68,7 @@ def succeeded(operation: Operation) -> bool:
return operation.status is not None and operation.status["state"] == "SUCCEEDED"


def from_resource_id(
session: Session, instance: Instance, resource_id: str
) -> Operation:
def by_resource_id(session: Session, instance: Instance, resource_id: str) -> Operation:
"""Get operation by ID
Args:
Expand Down Expand Up @@ -121,7 +119,7 @@ def _from_response(instance: Instance, response: requests.Response) -> Operation
return _from_json(_url, resource_json)


def _from_url(session: Session, url: URL) -> Operation:
def _by_url(session: Session, url: URL) -> Operation:
"""Get operation by URL
Fetches operation from Tamr server
Expand Down
44 changes: 40 additions & 4 deletions tamr_client/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,19 @@ class NotFound(TamrClientException):
pass


class Ambiguous(TamrClientException):
"""Raised when referencing a project by name that matches multiple possible targets."""

pass


class AlreadyExists(TamrClientException):
"""Raised when a project with these specifications already exists."""

pass


def from_resource_id(session: Session, instance: Instance, id: str) -> Project:
def by_resource_id(session: Session, instance: Instance, id: str) -> Project:
"""Get project by resource ID.
Fetches project from Tamr server.
Expand All @@ -35,10 +41,40 @@ def from_resource_id(session: Session, instance: Instance, id: str) -> Project:
requests.HTTPError: If any other HTTP error is encountered.
"""
url = URL(instance=instance, path=f"projects/{id}")
return _from_url(session, url)
return _by_url(session, url)


def by_name(session: Session, instance: Instance, name: str) -> Project:
"""Get project by name
Fetches project from Tamr server.
Args:
instance: Tamr instance containing this project
name: Project name
Raises:
project.NotFound: If no project could be found with that name.
project.Ambiguous: If multiple targets match project name.
requests.HTTPError: If any other HTTP error is encountered.
"""
r = session.get(
url=str(URL(instance=instance, path="projects")),
params={"filter": f"name=={name}"},
)

# Check that exactly one project is returned
matches = r.json()
if len(matches) == 0:
raise NotFound(str(r.url))
if len(matches) > 1:
raise Ambiguous(str(r.url))

# Make Project from response
url = URL(instance=instance, path=matches[0]["relativeId"])
return _from_json(url=url, data=matches[0])


def _from_url(session: Session, url: URL) -> Project:
def _by_url(session: Session, url: URL) -> Project:
"""Get project by URL.
Fetches project from Tamr server.
Expand Down Expand Up @@ -120,4 +156,4 @@ def _create(
project_path = data["relativeId"]
project_url = URL(instance=instance, path=str(project_path))

return _from_url(session=session, url=project_url)
return _by_url(session=session, url=project_url)
9 changes: 4 additions & 5 deletions tamr_client/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ def _input_transformation_from_json(
"""
dataset_resource_ids = [d["datasetId"].split("/")[-1] for d in data["datasets"]]
datasets = [
dataset.from_resource_id(session, instance, d_id)
for d_id in dataset_resource_ids
dataset.by_resource_id(session, instance, d_id) for d_id in dataset_resource_ids
]
return InputTransformation(transformation=data["transformation"], datasets=datasets)

Expand Down Expand Up @@ -83,7 +82,7 @@ def get_all(session: Session, project: Project) -> Transformations:
>>> import tamr_client as tc
>>> session = tc.session.from_auth('username', 'password')
>>> instance = tc.instance.Instance(host="localhost", port=9100)
>>> project1 = tc.project.from_resource_id(session, instance, id='1')
>>> project1 = tc.project.by_resource_id(session, instance, id='1')
>>> print(tc.transformations.get_all(session, project1))
"""
r = session.get(f"{project.url}/transformations")
Expand All @@ -107,8 +106,8 @@ def replace_all(
>>> import tamr_client as tc
>>> session = tc.session.from_auth('username', 'password')
>>> instance = tc.instance.Instance(host="localhost", port=9100)
>>> project1 = tc.project.from_resource_id(session, instance, id='1')
>>> dataset3 = tc.dataset.from_resource_id(session, instance, id='3')
>>> project1 = tc.project.by_resource_id(session, instance, id='1')
>>> dataset3 = tc.dataset.by_resource_id(session, instance, id='3')
>>> new_input_tx = tc.InputTransformation("SELECT *, upper(name) as name;", [dataset3])
>>> all_tx = tc.Transformations(
... input_scope=[new_input_tx],
Expand Down
8 changes: 4 additions & 4 deletions tests/tamr_client/attribute/test_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_delete():


@fake.json
def test_from_resource_id():
def test_by_resource_id():
s = fake.session()
dataset = fake.dataset()

Expand All @@ -93,7 +93,7 @@ def test_from_resource_id():
]
)

attr = tc.attribute.from_resource_id(s, dataset, "attr")
attr = tc.attribute.by_resource_id(s, dataset, "attr")

assert attr.name == "attr"
assert not attr.is_nullable
Expand All @@ -102,12 +102,12 @@ def test_from_resource_id():


@fake.json
def test_from_resource_id_attribute_not_found():
def test_by_resource_id_attribute_not_found():
s = fake.session()
dataset = fake.dataset()

with pytest.raises(tc.attribute.NotFound):
tc.attribute.from_resource_id(s, dataset, "attr")
tc.attribute.by_resource_id(s, dataset, "attr")


def test_create_reserved_attribute_name():
Expand Down
5 changes: 1 addition & 4 deletions tests/tamr_client/categorization/test_project.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
@fake.json
def test_manual_labels():
s = fake.session()
instance = fake.instance()
project = fake.categorization_project()

tc.categorization.project.manual_labels(
session=s, instance=instance, project=project
)
tc.categorization.project.manual_labels(session=s, project=project)

0 comments on commit 0f10ba3

Please sign in to comment.