-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #446 from skalish/cat-ops
TC: Add functions to run Categorization and Schema Mapping project workflows
- Loading branch information
Showing
18 changed files
with
240 additions
and
43 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# Categorization | ||
|
||
* [Categorization](/beta/categorization/categorization) | ||
* [Project](/beta/categorization/project) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
Categorization | ||
============== | ||
|
||
.. autofunction:: tamr_client.categorization.update_unified_dataset | ||
.. autofunction:: tamr_client.categorization.apply_feedback | ||
.. autofunction:: tamr_client.categorization.update_results | ||
.. autofunction:: tamr_client.categorization.manual_labels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# Schema Mapping | ||
|
||
* [Schema Mapping](/beta/schema_mapping/schema_mapping) | ||
* [Project](/beta/schema_mapping/project) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
Schema Mapping | ||
============== | ||
|
||
.. autofunction:: tamr_client.schema_mapping.update_unified_dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
""" | ||
Tamr - Categorization | ||
See https://docs.tamr.com/docs/overall-workflow-classification | ||
The terminology used here is consistent with Tamr UI terminology | ||
Asynchronous versions of each function can be found with the suffix `_async` and may be of | ||
interest to power users | ||
""" | ||
from tamr_client import operation | ||
from tamr_client._types import CategorizationProject, Dataset, Operation, Session | ||
from tamr_client.dataset import _dataset, unified | ||
|
||
|
||
def manual_labels(session: Session, project: CategorizationProject) -> Dataset: | ||
"""Get manual labels from a Categorization project. | ||
Args: | ||
project: Tamr project containing labels | ||
Returns: | ||
Dataset containing manual labels | ||
Raises: | ||
dataset.NotFound: If no dataset could be found at the specified URL | ||
dataset.Ambiguous: If multiple targets match dataset name | ||
""" | ||
unified_dataset = unified.from_project(session=session, project=project) | ||
labels_dataset_name = unified_dataset.name + "_manual_categorizations" | ||
return _dataset.by_name( | ||
session=session, instance=project.url.instance, name=labels_dataset_name | ||
) | ||
|
||
|
||
def update_unified_dataset( | ||
session: Session, project: CategorizationProject | ||
) -> Operation: | ||
"""Apply changes to the unified dataset and wait for the operation to complete | ||
Args: | ||
project: Tamr Categorization project | ||
""" | ||
unified_dataset = unified.from_project(session, project) | ||
op = unified._apply_changes_async(session, unified_dataset) | ||
return operation.wait(session, op) | ||
|
||
|
||
def apply_feedback(session: Session, project: CategorizationProject) -> Operation: | ||
"""Train the categorization model according to verified labels and wait for the | ||
operation to complete | ||
Args: | ||
project: Tamr Categorization project | ||
""" | ||
op = _apply_feedback_async(session, project) | ||
return operation.wait(session, op) | ||
|
||
|
||
def update_results(session: Session, project: CategorizationProject) -> Operation: | ||
"""Generate classifications based on the latest categorization model and wait for the | ||
operation to complete | ||
Args: | ||
project: Tamr Categorization project | ||
""" | ||
op = _update_results_async(session, project) | ||
return operation.wait(session, op) | ||
|
||
|
||
def _apply_feedback_async( | ||
session: Session, project: CategorizationProject | ||
) -> Operation: | ||
r = session.post(str(project.url) + "/categorizations/model:refresh") | ||
return operation._from_response(project.url.instance, r) | ||
|
||
|
||
def _update_results_async( | ||
session: Session, project: CategorizationProject | ||
) -> Operation: | ||
r = session.post(str(project.url) + "/categorizations:refresh") | ||
return operation._from_response(project.url.instance, r) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
""" | ||
Tamr - Schema Mapping | ||
See https://docs.tamr.com/new/docs/overall-workflow-schema | ||
The terminology used here is consistent with Tamr UI terminology | ||
Asynchronous versions of each function can be found with the suffix `_async` and may be of | ||
interest to power users | ||
""" | ||
from tamr_client import operation | ||
from tamr_client._types import Operation, SchemaMappingProject, Session | ||
from tamr_client.dataset import unified | ||
|
||
|
||
def update_unified_dataset( | ||
session: Session, project: SchemaMappingProject | ||
) -> Operation: | ||
"""Apply changes to the unified dataset and wait for the operation to complete | ||
Args: | ||
project: Tamr Schema Mapping project | ||
""" | ||
unified_dataset = unified.from_project(session, project) | ||
op = unified._apply_changes_async(session, unified_dataset) | ||
return operation.wait(session, op) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import tamr_client as tc | ||
from tests.tamr_client import fake | ||
|
||
|
||
@fake.json | ||
def test_manual_labels(): | ||
s = fake.session() | ||
project = fake.categorization_project() | ||
|
||
tc.categorization.manual_labels(session=s, project=project) | ||
|
||
|
||
@fake.json | ||
def test_apply_feedback_async(): | ||
s = fake.session() | ||
project = fake.categorization_project() | ||
|
||
op = tc.categorization._apply_feedback_async(s, project) | ||
assert op.type == "SPARK" | ||
assert op.description == "Materialize views to Elastic" | ||
assert op.status == { | ||
"state": "PENDING", | ||
"startTime": "", | ||
"endTime": "", | ||
"message": "Job has not yet been submitted to Spark", | ||
} | ||
|
||
|
||
@fake.json | ||
def test_update_results_async(): | ||
s = fake.session() | ||
project = fake.categorization_project() | ||
|
||
op = tc.categorization._update_results_async(s, project) | ||
assert op.type == "SPARK" | ||
assert op.description == "Materialize views to Elastic" | ||
assert op.status == { | ||
"state": "PENDING", | ||
"startTime": "", | ||
"endTime": "", | ||
"message": "Job has not yet been submitted to Spark", | ||
} |
This file was deleted.
Oops, something went wrong.
33 changes: 33 additions & 0 deletions
33
...s/tamr_client/fake_json/categorization/test_categorization/test_apply_feedback_async.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[ | ||
{ | ||
"request": { | ||
"method": "POST", | ||
"path": "projects/2/categorizations/model:refresh" | ||
}, | ||
"response": { | ||
"status": 200, | ||
"json": { | ||
"id": "1", | ||
"type": "SPARK", | ||
"description": "Materialize views to Elastic", | ||
"status": { | ||
"state": "PENDING", | ||
"startTime": "", | ||
"endTime": "", | ||
"message": "Job has not yet been submitted to Spark" | ||
}, | ||
"created": { | ||
"username": "admin", | ||
"time": "2020-06-12T18:21:42.288Z", | ||
"version": "operation 1 created version" | ||
}, | ||
"lastModified": { | ||
"username": "admin", | ||
"time": "2020-06-12T18:21:42.288Z", | ||
"version": "operation 1 modified version" | ||
}, | ||
"relativeId": "operations/1" | ||
} | ||
} | ||
} | ||
] |
File renamed without changes.
33 changes: 33 additions & 0 deletions
33
...s/tamr_client/fake_json/categorization/test_categorization/test_update_results_async.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[ | ||
{ | ||
"request": { | ||
"method": "POST", | ||
"path": "projects/2/categorizations:refresh" | ||
}, | ||
"response": { | ||
"status": 200, | ||
"json": { | ||
"id": "1", | ||
"type": "SPARK", | ||
"description": "Materialize views to Elastic", | ||
"status": { | ||
"state": "PENDING", | ||
"startTime": "", | ||
"endTime": "", | ||
"message": "Job has not yet been submitted to Spark" | ||
}, | ||
"created": { | ||
"username": "admin", | ||
"time": "2020-06-12T18:21:42.288Z", | ||
"version": "operation 1 created version" | ||
}, | ||
"lastModified": { | ||
"username": "admin", | ||
"time": "2020-06-12T18:21:42.288Z", | ||
"version": "operation 1 modified version" | ||
}, | ||
"relativeId": "operations/1" | ||
} | ||
} | ||
} | ||
] |