-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #440 from skalish/mastering-ops
TC: Add functions for basic mastering workflow operations
- Loading branch information
Showing
4 changed files
with
111 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# Mastering | ||
|
||
* [Mastering](/beta/mastering/mastering) | ||
* [Project](/beta/mastering/project) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Mastering | ||
========= | ||
|
||
.. autofunction:: tamr_client.mastering.update_unified_dataset | ||
.. autofunction:: tamr_client.mastering.estimate_pairs | ||
.. autofunction:: tamr_client.mastering.generate_pairs | ||
.. autofunction:: tamr_client.mastering.apply_feedback | ||
.. autofunction:: tamr_client.mastering.update_pair_results | ||
.. autofunction:: tamr_client.mastering.update_high_impact_pairs | ||
.. autofunction:: tamr_client.mastering.update_cluster_results | ||
.. autofunction:: tamr_client.mastering.publish_clusters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
""" | ||
Tamr - Mastering | ||
See https://docs.tamr.com/docs/overall-workflow-mastering | ||
The terminology used here is consistent with Tamr UI terminology | ||
""" | ||
from tamr_client import operation | ||
from tamr_client._types import MasteringProject, Operation, Session | ||
from tamr_client.dataset import unified | ||
|
||
|
||
def update_unified_dataset(session: Session, project: MasteringProject) -> Operation: | ||
"""Applies changes to the unified dataset and waits for the operation to complete | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
unified_dataset = unified.from_project(session, project.url.instance, project) | ||
return unified.apply_changes(session, unified_dataset) | ||
|
||
|
||
def estimate_pairs(session: Session, project: MasteringProject) -> Operation: | ||
"""Updates the estimated pair counts | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
r = session.post(str(project.url) + "estimatedPairCounts:refresh") | ||
return operation._from_response(project.url.instance, r) | ||
|
||
|
||
def generate_pairs(session: Session, project: MasteringProject) -> Operation: | ||
"""Generates pairs according to the binning model | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
r = session.post(str(project.url) + "recordPairs:refresh") | ||
return operation._from_response(project.url.instance, r) | ||
|
||
|
||
def apply_feedback(session: Session, project: MasteringProject) -> Operation: | ||
"""Trains the pair-matching model according to verified labels | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
r = session.post(str(project.url) + "recordPairsWithPredictions/model:refresh") | ||
return operation._from_response(project.url.instance, r) | ||
|
||
|
||
def update_pair_results(session: Session, project: MasteringProject) -> Operation: | ||
"""Updates record pair predictions according to the latest pair-matching model | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
r = session.post(str(project.url) + "recordPairsWithPredictions:refresh") | ||
return operation._from_response(project.url.instance, r) | ||
|
||
|
||
def update_high_impact_pairs(session: Session, project: MasteringProject) -> Operation: | ||
"""Produces new high-impact pairs according to the latest pair-matching model | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
r = session.post(str(project.url) + "highImpactPairs:refresh") | ||
return operation._from_response(project.url.instance, r) | ||
|
||
|
||
def update_cluster_results(session: Session, project: MasteringProject) -> Operation: | ||
"""Generates clusters based on the latest pair-matching model | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
r = session.post(str(project.url) + "recordClusters:refresh") | ||
return operation._from_response(project.url.instance, r) | ||
|
||
|
||
def publish_clusters(session: Session, project: MasteringProject) -> Operation: | ||
"""Publishes current record clusters | ||
Args: | ||
project: Tamr Mastering project | ||
""" | ||
r = session.post(str(project.url) + "publishedClustersWithData:refresh") | ||
return operation._from_response(project.url.instance, r) |