-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #389 from ianbakst/master
Unified Dataset dataclass
- Loading branch information
Showing
15 changed files
with
242 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
Unified | ||
======= | ||
|
||
.. autoclass:: tamr_client.dataset.unified.UnifiedDataset | ||
|
||
.. autofunction:: tamr_client.dataset.unified.from_project | ||
.. autofunction:: tamr_client.dataset.unified.commit | ||
|
||
Exceptions | ||
---------- | ||
|
||
.. autoclass:: tamr_client.dataset.unified.NotFound | ||
:no-inherited-members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# flake8: noqa | ||
from tamr_client.dataset.dataset import Dataset | ||
from tamr_client.dataset.dataset import AnyDataset, Dataset | ||
from tamr_client.dataset.dataset import from_resource_id | ||
from tamr_client.dataset.dataset import DatasetNotFound | ||
from tamr_client.dataset import dataframe, record | ||
from tamr_client.dataset.dataset import NotFound | ||
from tamr_client.dataset import dataframe, record, unified |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
""" | ||
See https://docs.tamr.com/reference/dataset-models | ||
""" | ||
from copy import deepcopy | ||
from dataclasses import dataclass | ||
from typing import Optional, Tuple | ||
|
||
from tamr_client import response | ||
from tamr_client.instance import Instance | ||
from tamr_client.project import Project | ||
from tamr_client.session import Session | ||
from tamr_client.types import JsonDict | ||
from tamr_client.url import URL | ||
|
||
|
||
class NotFound(Exception): | ||
"""Raised when referencing (e.g. updating or deleting) a unified dataset | ||
that does not exist on the server. | ||
""" | ||
|
||
pass | ||
|
||
|
||
@dataclass(frozen=True) | ||
class UnifiedDataset: | ||
"""A Tamr unified dataset | ||
See https://docs.tamr.com/reference/dataset-models | ||
Args: | ||
url | ||
key_attribute_names | ||
""" | ||
|
||
url: URL | ||
name: str | ||
key_attribute_names: Tuple[str, ...] | ||
description: Optional[str] = None | ||
|
||
|
||
def from_project( | ||
session: Session, instance: Instance, project: Project | ||
) -> UnifiedDataset: | ||
"""Get unified dataset of a project | ||
Fetches the unified dataset of a given project from Tamr server | ||
Args: | ||
instance: Tamr instance containing this dataset | ||
project: Tamr project of this Unified Dataset | ||
Raises: | ||
unified.NotFound: If no unified dataset could be found at the specified URL. | ||
Corresponds to a 404 HTTP error. | ||
requests.HTTPError: If any other HTTP error is encountered. | ||
""" | ||
url = URL(instance=instance, path=f"{project.url.path}/unifiedDataset") | ||
return _from_url(session, url) | ||
|
||
|
||
def _from_url(session: Session, url: URL) -> UnifiedDataset: | ||
"""Get dataset by URL | ||
Fetches dataset from Tamr server | ||
Args: | ||
url: Dataset URL | ||
Raises: | ||
unified.NotFound: If no dataset could be found at the specified URL. | ||
Corresponds to a 404 HTTP error. | ||
requests.HTTPError: If any other HTTP error is encountered. | ||
""" | ||
r = session.get(str(url)) | ||
if r.status_code == 404: | ||
raise NotFound(str(url)) | ||
data = response.successful(r).json() | ||
return _from_json(url, data) | ||
|
||
|
||
def _from_json(url: URL, data: JsonDict) -> UnifiedDataset: | ||
"""Make unified dataset from JSON data (deserialize) | ||
Args: | ||
url: Unified Dataset URL | ||
data: Unified Dataset JSON data from Tamr server | ||
""" | ||
cp = deepcopy(data) | ||
return UnifiedDataset( | ||
url, | ||
name=cp["name"], | ||
description=cp.get("description"), | ||
key_attribute_names=tuple(cp["keyAttributeNames"]), | ||
) | ||
|
||
|
||
def commit(session: Session, unified_dataset: UnifiedDataset) -> JsonDict: | ||
"""Commits the Unified Dataset. | ||
Args: | ||
unified_dataset: The UnifiedDataset which will be committed | ||
session: The Tamr Session | ||
""" | ||
r = session.post( | ||
str(unified_dataset.url) + ":refresh", | ||
headers={"Content-Type": "application/json", "Accept": "application/json"}, | ||
) | ||
return response.successful(r).json() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"id": "1", | ||
"type": "SPARK", | ||
"description": "operation 1 description", | ||
"status": { | ||
"state": "PENDING", | ||
"startTime": "", | ||
"endTime": "", | ||
"message": "Job has not yet been submitted to Spark" | ||
}, | ||
"created": { | ||
"username": "admin", | ||
"time": "2020-06-12T18:21:42.288Z", | ||
"version": "operation 1 created version" | ||
}, | ||
"lastModified": { | ||
"username": "admin", | ||
"time": "2020-06-12T18:21:42.288Z", | ||
"version": "operation 1 modified version" | ||
}, | ||
"relativeId": "operations/1" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import pytest | ||
import responses | ||
|
||
import tamr_client as tc | ||
from tests.tamr_client import utils | ||
|
||
|
||
@responses.activate | ||
def test_from_project(): | ||
s = utils.session() | ||
instance = utils.instance() | ||
project = utils.mastering_project() | ||
|
||
dataset_json = utils.load_json("dataset.json") | ||
url = tc.URL(path="projects/1/unifiedDataset") | ||
responses.add(responses.GET, str(url), json=dataset_json) | ||
|
||
unified_dataset = tc.dataset.unified.from_project(s, instance, project) | ||
assert unified_dataset.name == "dataset 1 name" | ||
assert unified_dataset.description == "dataset 1 description" | ||
assert unified_dataset.key_attribute_names == ("tamr_id",) | ||
|
||
|
||
@responses.activate | ||
def test_from_project_dataset_not_found(): | ||
s = utils.session() | ||
instance = utils.instance() | ||
project = utils.mastering_project() | ||
|
||
url = tc.URL(path="projects/1/unifiedDataset") | ||
responses.add(responses.GET, str(url), status=404) | ||
|
||
with pytest.raises(tc.dataset.unified.NotFound): | ||
tc.dataset.unified.from_project(s, instance, project) | ||
|
||
|
||
@responses.activate | ||
def test_commit(): | ||
s = utils.session() | ||
instance = utils.instance() | ||
project = utils.mastering_project() | ||
|
||
operation_json = utils.load_json("operation.json") | ||
dataset_json = utils.load_json("dataset.json") | ||
prj_url = tc.URL(path="projects/1/unifiedDataset") | ||
responses.add(responses.GET, str(prj_url), json=dataset_json) | ||
unified_dataset = tc.dataset.unified.from_project(s, instance, project) | ||
|
||
url = tc.URL(path="projects/1/unifiedDataset:refresh") | ||
responses.add(responses.POST, str(url), json=operation_json) | ||
|
||
response = tc.dataset.unified.commit(s, unified_dataset) | ||
assert response == operation_json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters