Skip to content

Commit

Permalink
Merge pull request #452 from skalish/dataset-crud
Browse files Browse the repository at this point in the history
Dataset CRUD + Project List Functions
  • Loading branch information
pcattori committed Sep 3, 2020
2 parents d72d7eb + 0625243 commit 63f0433
Show file tree
Hide file tree
Showing 28 changed files with 734 additions and 11 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
- [#445](https://github.com/Datatamer/tamr-client/pull/445) Added functions for getting projects and datasets by name via `tc.project.by_name` and `tc.dataset.by_name`
- Renamed functions `from_resource_id` to `by_resource_id` in `tc.attribute`, `tc.dataset`, `tc.operation`, and `tc.project`
- [#446](https://github.com/Datatamer/tamr-client/pull/446) Added functions for categorization workflow operations in `tc.categorization` and schema mapping workflow operations in `tc.schema_mapping`
- [#452](https://github.com/Datatamer/tamr-client/pull/452) Added functions for creating and deleting a dataset via `tc.dataset.create` and `tc.dataset.delete`
- Added function for deleting all records in a dataset via `tc.record.delete_all`
- Added functions for getting all datasets and projects in a Tamr instance via `get_all` functions in `tc.dataset` and `tc.project`

**NEW FEATURES**
- [#383](https://github.com/Datatamer/tamr-client/issues/383) Now able to create an Operation from Job resource id
Expand Down
6 changes: 6 additions & 0 deletions docs/beta/dataset/dataset.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ Dataset
.. autofunction:: tamr_client.dataset.by_name
.. autofunction:: tamr_client.dataset.attributes
.. autofunction:: tamr_client.dataset.materialize
.. autofunction:: tamr_client.dataset.delete
.. autofunction:: tamr_client.dataset.get_all
.. autofunction:: tamr_client.dataset.create

Exceptions
----------
Expand All @@ -16,3 +19,6 @@ Exceptions

.. autoclass:: tamr_client.dataset.Ambiguous
:no-inherited-members:

.. autoclass:: tamr_client.dataset.AlreadyExists
:no-inherited-members:
3 changes: 2 additions & 1 deletion docs/beta/dataset/record.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ Record
.. autofunction:: tamr_client.record.upsert
.. autofunction:: tamr_client.record.delete
.. autofunction:: tamr_client.record._update
.. autofunction:: tamr_client.record.stream
.. autofunction:: tamr_client.record.stream
.. autofunction:: tamr_client.record.delete_all
1 change: 1 addition & 0 deletions docs/beta/project.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Project

.. autofunction:: tamr_client.project.by_resource_id
.. autofunction:: tamr_client.project.by_name
.. autofunction:: tamr_client.project.get_all

Exceptions
----------
Expand Down
4 changes: 2 additions & 2 deletions tamr_client/attribute/_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ def create(
The newly created attribute
Raises:
ReservedName: If attribute name is reserved.
AlreadyExists: If an attribute already exists at the specified URL.
attribute.ReservedName: If attribute name is reserved.
attribute.AlreadyExists: If an attribute already exists at the specified URL.
Corresponds to a 409 HTTP error.
requests.HTTPError: If any other HTTP error is encountered.
"""
Expand Down
2 changes: 1 addition & 1 deletion tamr_client/categorization/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def create(
Project created in Tamr
Raises:
attribute.AlreadyExists: If a project with these specifications already exists
project.AlreadyExists: If a project with these specifications already exists
requests.HTTPError: If any other HTTP error is encountered
"""
return project._create(
Expand Down
4 changes: 4 additions & 0 deletions tamr_client/dataset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from tamr_client.dataset import dataframe, record, unified
from tamr_client.dataset._dataset import (
_materialize_async,
AlreadyExists,
Ambiguous,
attributes,
by_name,
by_resource_id,
create,
delete,
get_all,
materialize,
NotFound,
)
109 changes: 108 additions & 1 deletion tamr_client/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
from copy import deepcopy
from dataclasses import replace
from typing import Tuple
from typing import List, Optional, Tuple, Union

from tamr_client import operation, response
from tamr_client._types import (
Expand Down Expand Up @@ -33,6 +33,12 @@ class Ambiguous(TamrClientException):
pass


class AlreadyExists(TamrClientException):
"""Raised when a dataset with these specifications already exists."""

pass


def by_resource_id(session: Session, instance: Instance, id: str) -> Dataset:
"""Get dataset by resource ID
Expand Down Expand Up @@ -157,3 +163,104 @@ def materialize(session: Session, dataset: Dataset) -> Operation:
def _materialize_async(session: Session, dataset: Dataset) -> Operation:
r = session.post(str(dataset.url) + ":refresh",)
return operation._from_response(dataset.url.instance, r)


def delete(session: Session, dataset: Dataset, *, cascade: bool = False):
"""Deletes an existing dataset
Sends a deletion request to the Tamr server
Args:
dataset: Existing dataset to delete
cascade: Whether to delete all derived datasets as well
Raises:
dataset.NotFound: If no dataset could be found at the specified URL.
Corresponds to a 404 HTTP error.
requests.HTTPError: If any other HTTP error is encountered.
"""
r = session.delete(str(dataset.url), params={"cascade": cascade})
if r.status_code == 404:
raise NotFound(str(dataset.url))
response.successful(r)


def get_all(
session: Session,
instance: Instance,
*,
filter: Optional[Union[str, List[str]]] = None,
) -> Tuple[Dataset, ...]:
"""Get all datasets from an instance
Args:
instance: Tamr instance from which to get datasets
filter: Filter expression, e.g. "externalId==wobbly"
Multiple expressions can be passed as a list
Returns:
The datasets retrieved from the instance
Raises:
requests.HTTPError: If an HTTP error is encountered.
"""
url = URL(instance=instance, path="datasets")

if filter is not None:
r = session.get(str(url), params={"filter": filter})
else:
r = session.get(str(url))

datasets_json = response.successful(r).json()

datasets = []
for dataset_json in datasets_json:
dataset_url = URL(instance=instance, path=dataset_json["relativeId"])
dataset = _from_json(dataset_url, dataset_json)
datasets.append(dataset)
return tuple(datasets)


def create(
session: Session,
instance: Instance,
*,
name: str,
key_attribute_names: Tuple[str, ...],
description: Optional[str] = None,
external_id: Optional[str] = None,
) -> Dataset:
"""Create a dataset in Tamr.
Args:
instance: Tamr instance
name: Dataset name
key_attribute_names: Dataset primary key attribute names
description: Dataset description
external_id: External ID of the dataset
Returns:
Dataset created in Tamr
Raises:
dataset.AlreadyExists: If a dataset with these specifications already exists.
requests.HTTPError: If any other HTTP error is encountered.
"""
data = {
"name": name,
"keyAttributeNames": key_attribute_names,
"description": description,
"externalId": external_id,
}

dataset_url = URL(instance=instance, path="datasets")
r = session.post(url=str(dataset_url), json=data)

if r.status_code == 400 and "already exists" in r.json()["message"]:
raise AlreadyExists(r.json()["message"])

data = response.successful(r).json()
dataset_path = data["relativeId"]
dataset_url = URL(instance=instance, path=str(dataset_path))

return _by_url(session=session, url=dataset_url)
10 changes: 10 additions & 0 deletions tamr_client/dataset/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,13 @@ def stream(session: Session, dataset: AnyDataset) -> Iterator[JsonDict]:
"""
with session.get(str(dataset.url) + "/records", stream=True) as r:
return response.ndjson(r)


def delete_all(session: Session, dataset: AnyDataset):
"""Delete all records in this dataset
Args:
dataset: Dataset from which to delete records
"""
r = session.delete(str(dataset.url) + "/records")
response.successful(r)
2 changes: 1 addition & 1 deletion tamr_client/mastering/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def create(
Project created in Tamr
Raises:
AlreadyExists: If a project with these specifications already exists.
project.AlreadyExists: If a project with these specifications already exists.
requests.HTTPError: If any other HTTP error is encountered.
"""
return project._create(
Expand Down
2 changes: 1 addition & 1 deletion tamr_client/operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _by_url(session: Session, url: URL) -> Operation:
url: Operation URL
Raises:
OperationNotFound: If no operation could be found at the specified URL.
operation.NotFound: If no operation could be found at the specified URL.
Corresponds to a 404 HTTP error.
requests.HTTPError: If any other HTTP error is encountered.
"""
Expand Down
42 changes: 39 additions & 3 deletions tamr_client/project.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional
from typing import List, Optional, Tuple, Union

from tamr_client import response
from tamr_client._types import Instance, JsonDict, Project, Session, URL
Expand Down Expand Up @@ -82,7 +82,7 @@ def _by_url(session: Session, url: URL) -> Project:
url: Project URL
Raises:
NotFound: If no project could be found at the specified URL.
project.NotFound: If no project could be found at the specified URL.
Corresponds to a 404 HTTP error.
requests.HTTPError: If any other HTTP error is encountered.
"""
Expand Down Expand Up @@ -133,7 +133,7 @@ def _create(
Project created in Tamr
Raises:
AlreadyExists: If a project with these specifications already exists.
project.AlreadyExists: If a project with these specifications already exists.
requests.HTTPError: If any other HTTP error is encountered.
"""
if not unified_dataset_name:
Expand All @@ -157,3 +157,39 @@ def _create(
project_url = URL(instance=instance, path=str(project_path))

return _by_url(session=session, url=project_url)


def get_all(
session: Session,
instance: Instance,
*,
filter: Optional[Union[str, List[str]]] = None,
) -> Tuple[Project, ...]:
"""Get all projects from an instance
Args:
instance: Tamr instance from which to get projects
filter: Filter expression, e.g. "externalId==wobbly"
Multiple expressions can be passed as a list
Returns:
The projects retrieved from the instance
Raises:
requests.HTTPError: If an HTTP error is encountered.
"""
url = URL(instance=instance, path="projects")

if filter is not None:
r = session.get(str(url), params={"filter": filter})
else:
r = session.get(str(url))

projects_json = response.successful(r).json()

projects = []
for project_json in projects_json:
project_url = URL(instance=instance, path=project_json["relativeId"])
project = _from_json(project_url, project_json)
projects.append(project)
return tuple(projects)
2 changes: 1 addition & 1 deletion tamr_client/schema_mapping/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def create(
Project created in Tamr
Raises:
AlreadyExists: If a project with these specifications already exists.
project.AlreadyExists: If a project with these specifications already exists.
requests.HTTPError: If any other HTTP error is encountered.
"""
return project._create(
Expand Down

0 comments on commit 63f0433

Please sign in to comment.