Skip to content

Commit

Permalink
Merge pull request #461 from skalish/tc-golden-records
Browse files Browse the repository at this point in the history
Add Golden Records project with update and publish functions.
  • Loading branch information
pcattori committed Oct 14, 2020
2 parents 4297599 + 4844167 commit 27e8d7d
Show file tree
Hide file tree
Showing 16 changed files with 221 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- Added functions for getting all datasets and projects in a Tamr instance via `get_all` functions in `tc.dataset` and `tc.project`
- [#454](https://github.com/Datatamer/tamr-client/pull/454) Added first `tamr_client` tutorial "Get Tamr version"
- [#456](https://github.com/Datatamer/tamr-client/pull/456) Added first example `tamr_client` script `examples/get_tamr_version.py`
- [#461](https://github.com/Datatamer/tamr-client/pull/461) Added functions for golden record workflow operations in `tc.golden_records`

**NEW FEATURES**
- [#383](https://github.com/Datatamer/tamr-client/issues/383) Now able to create an Operation from Job resource id
Expand Down
1 change: 1 addition & 0 deletions docs/beta.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* [Auth](beta/auth)
* [Categorization](beta/categorization)
* [Dataset](beta/dataset)
* [Golden Records](beta/golden_records)
* [Instance](beta/instance)
* [Mastering](beta/mastering)
* [Operation](beta/operation)
Expand Down
4 changes: 4 additions & 0 deletions docs/beta/golden_records.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Golden Records

* [Golden Records](/beta/golden_records/golden_records)
* [Project](/beta/golden_records/project)
5 changes: 5 additions & 0 deletions docs/beta/golden_records/golden_records.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Golden Records
==============

.. autofunction:: tamr_client.golden_records.update
.. autofunction:: tamr_client.golden_records.publish
4 changes: 4 additions & 0 deletions docs/beta/golden_records/project.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Golden Records Project
======================

.. autoclass:: tamr_client.GoldenRecordsProject
2 changes: 2 additions & 0 deletions tamr_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
AttributeType,
CategorizationProject,
Dataset,
GoldenRecordsProject,
InputTransformation,
Instance,
MasteringProject,
Expand All @@ -42,6 +43,7 @@
from tamr_client import attribute
from tamr_client import categorization
from tamr_client import dataset
from tamr_client import golden_records
from tamr_client import instance
from tamr_client import mastering
from tamr_client import operation
Expand Down
1 change: 1 addition & 0 deletions tamr_client/_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from tamr_client._types.operation import Operation
from tamr_client._types.project import (
CategorizationProject,
GoldenRecordsProject,
MasteringProject,
Project,
SchemaMappingProject,
Expand Down
21 changes: 20 additions & 1 deletion tamr_client/_types/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,23 @@ class SchemaMappingProject:
description: Optional[str] = None


Project = Union[CategorizationProject, MasteringProject, SchemaMappingProject]
@dataclass(frozen=True)
class GoldenRecordsProject:
"""A Tamr Golden Records project
See https://docs.tamr.com/reference/the-project-object
Args:
url
name
description
"""

url: URL
name: str
description: Optional[str] = None


Project = Union[
CategorizationProject, MasteringProject, SchemaMappingProject, GoldenRecordsProject
]
11 changes: 11 additions & 0 deletions tamr_client/golden_records/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
Tamr - Golden Records
See https://docs.tamr.com/docs/overview-golden-records
"""
from tamr_client.golden_records import project
from tamr_client.golden_records._golden_records import (
_publish_async,
_update_async,
publish,
update,
)
44 changes: 44 additions & 0 deletions tamr_client/golden_records/_golden_records.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""
Tamr - Golden Records
See https://docs.tamr.com/docs/overview-golden-records
The terminology used here is consistent with Tamr UI terminology
Asynchronous versions of each function can be found with the suffix `_async` and may be of
interest to power users
"""
from tamr_client import operation
from tamr_client._types import GoldenRecordsProject, Operation, Session


def update(session: Session, project: GoldenRecordsProject) -> Operation:
"""Update the draft golden records and wait for the operation to complete
Args:
project: Tamr Golden Records project
"""
op = _update_async(session, project)
return operation.wait(session, op)


def publish(session: Session, project: GoldenRecordsProject) -> Operation:
"""Publish the golden records and wait for the operation to complete
Args:
project: Tamr Golden Records project
"""
op = _publish_async(session, project)
return operation.wait(session, op)


def _update_async(session: Session, project: GoldenRecordsProject) -> Operation:
r = session.post(str(project.url) + "/goldenRecords:refresh")
return operation._from_response(project.url.instance, r)


def _publish_async(session: Session, project: GoldenRecordsProject) -> Operation:
r = session.post(
str(project.url) + "/publishedGoldenRecords:refresh",
params={"validate": "true", "version": "CURRENT"},
)
return operation._from_response(project.url.instance, r)
17 changes: 17 additions & 0 deletions tamr_client/golden_records/project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from tamr_client._types import (
GoldenRecordsProject,
JsonDict,
URL,
)


def _from_json(url: URL, data: JsonDict) -> GoldenRecordsProject:
"""Make golden records project from JSON data (deserialize)
Args:
url: Project URL
data: Project JSON data from Tamr server
"""
return GoldenRecordsProject(
url, name=data["name"], description=data.get("description")
)
3 changes: 3 additions & 0 deletions tamr_client/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from tamr_client._types import Instance, JsonDict, Project, Session, URL
from tamr_client.categorization import project as categorization_project
from tamr_client.exception import TamrClientException
from tamr_client.golden_records import project as golden_records_project
from tamr_client.mastering import project as mastering_project
from tamr_client.schema_mapping import project as schema_mapping_project

Expand Down Expand Up @@ -106,6 +107,8 @@ def _from_json(url: URL, data: JsonDict) -> Project:
return categorization_project._from_json(url, data)
elif proj_type == "SCHEMA_MAPPING_RECOMMENDATIONS":
return schema_mapping_project._from_json(url, data)
elif proj_type == "GOLDEN_RECORDS":
return golden_records_project._from_json(url, data)
else:
raise ValueError(f"Unrecognized project type '{proj_type}' in {repr(data)}")

Expand Down
8 changes: 8 additions & 0 deletions tests/tamr_client/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,14 @@ def categorization_project() -> tc.CategorizationProject:
return categorization_project


def golden_records_project() -> tc.GoldenRecordsProject:
url = tc.URL(path="projects/3")
golden_records_project = tc.GoldenRecordsProject(
url, name="Project 3", description="A Golden Records Project"
)
return golden_records_project


def transforms() -> tc.Transformations:
return tc.Transformations(
input_scope=[
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[
{
"request": {
"method": "POST",
"path": "projects/3/publishedGoldenRecords:refresh?validate=true&version=CURRENT"
},
"response": {
"status": 200,
"json": {
"id": "1",
"type": "SPARK",
"description": "Updating published datasets for GoldenRecords module",
"status": {
"state": "PENDING",
"startTime": "",
"endTime": "",
"message": "Job has not yet been submitted to Spark"
},
"created": {
"username": "admin",
"time": "2020-06-12T18:21:42.288Z",
"version": "operation 1 created version"
},
"lastModified": {
"username": "admin",
"time": "2020-06-12T18:21:42.288Z",
"version": "operation 1 modified version"
},
"relativeId": "operations/1"
}
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[
{
"request": {
"method": "POST",
"path": "projects/3/goldenRecords:refresh"
},
"response": {
"status": 200,
"json": {
"id": "1",
"type": "SPARK",
"description": "Updating Golden Records",
"status": {
"state": "PENDING",
"startTime": "",
"endTime": "",
"message": "Job has not yet been submitted to Spark"
},
"created": {
"username": "admin",
"time": "2020-06-12T18:21:42.288Z",
"version": "operation 1 created version"
},
"lastModified": {
"username": "admin",
"time": "2020-06-12T18:21:42.288Z",
"version": "operation 1 modified version"
},
"relativeId": "operations/1"
}
}
}
]
34 changes: 34 additions & 0 deletions tests/tamr_client/golden_records/test_golden_records.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import tamr_client as tc
from tests.tamr_client import fake


@fake.json
def test_update_async():
s = fake.session()
project = fake.golden_records_project()

op = tc.golden_records._update_async(s, project)
assert op.type == "SPARK"
assert op.description == "Updating Golden Records"
assert op.status == {
"state": "PENDING",
"startTime": "",
"endTime": "",
"message": "Job has not yet been submitted to Spark",
}


@fake.json
def test_publish_async():
s = fake.session()
project = fake.golden_records_project()

op = tc.golden_records._publish_async(s, project)
assert op.type == "SPARK"
assert op.description == "Updating published datasets for GoldenRecords module"
assert op.status == {
"state": "PENDING",
"startTime": "",
"endTime": "",
"message": "Job has not yet been submitted to Spark",
}

0 comments on commit 27e8d7d

Please sign in to comment.