Skip to content

Commit

Permalink
Merge pull request #440 from skalish/mastering-ops
Browse files Browse the repository at this point in the history
TC: Add functions for basic mastering workflow operations
  • Loading branch information
pcattori committed Aug 20, 2020
2 parents 146c883 + 8660040 commit 50f4cdb
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/beta/mastering.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Mastering

* [Mastering](/beta/mastering/mastering)
* [Project](/beta/mastering/project)
11 changes: 11 additions & 0 deletions docs/beta/mastering/mastering.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Mastering
=========

.. autofunction:: tamr_client.mastering.update_unified_dataset
.. autofunction:: tamr_client.mastering.estimate_pairs
.. autofunction:: tamr_client.mastering.generate_pairs
.. autofunction:: tamr_client.mastering.apply_feedback
.. autofunction:: tamr_client.mastering.update_pair_results
.. autofunction:: tamr_client.mastering.update_high_impact_pairs
.. autofunction:: tamr_client.mastering.update_cluster_results
.. autofunction:: tamr_client.mastering.publish_clusters
10 changes: 10 additions & 0 deletions tamr_client/mastering/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,13 @@
See https://docs.tamr.com/docs/overall-workflow-mastering
"""
from tamr_client.mastering import project
from tamr_client.mastering._mastering import (
apply_feedback,
estimate_pairs,
generate_pairs,
publish_clusters,
update_cluster_results,
update_high_impact_pairs,
update_pair_results,
update_unified_dataset,
)
89 changes: 89 additions & 0 deletions tamr_client/mastering/_mastering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
Tamr - Mastering
See https://docs.tamr.com/docs/overall-workflow-mastering
The terminology used here is consistent with Tamr UI terminology
"""
from tamr_client import operation
from tamr_client._types import MasteringProject, Operation, Session
from tamr_client.dataset import unified


def update_unified_dataset(session: Session, project: MasteringProject) -> Operation:
"""Applies changes to the unified dataset and waits for the operation to complete
Args:
project: Tamr Mastering project
"""
unified_dataset = unified.from_project(session, project.url.instance, project)
return unified.apply_changes(session, unified_dataset)


def estimate_pairs(session: Session, project: MasteringProject) -> Operation:
"""Updates the estimated pair counts
Args:
project: Tamr Mastering project
"""
r = session.post(str(project.url) + "estimatedPairCounts:refresh")
return operation._from_response(project.url.instance, r)


def generate_pairs(session: Session, project: MasteringProject) -> Operation:
"""Generates pairs according to the binning model
Args:
project: Tamr Mastering project
"""
r = session.post(str(project.url) + "recordPairs:refresh")
return operation._from_response(project.url.instance, r)


def apply_feedback(session: Session, project: MasteringProject) -> Operation:
"""Trains the pair-matching model according to verified labels
Args:
project: Tamr Mastering project
"""
r = session.post(str(project.url) + "recordPairsWithPredictions/model:refresh")
return operation._from_response(project.url.instance, r)


def update_pair_results(session: Session, project: MasteringProject) -> Operation:
"""Updates record pair predictions according to the latest pair-matching model
Args:
project: Tamr Mastering project
"""
r = session.post(str(project.url) + "recordPairsWithPredictions:refresh")
return operation._from_response(project.url.instance, r)


def update_high_impact_pairs(session: Session, project: MasteringProject) -> Operation:
"""Produces new high-impact pairs according to the latest pair-matching model
Args:
project: Tamr Mastering project
"""
r = session.post(str(project.url) + "highImpactPairs:refresh")
return operation._from_response(project.url.instance, r)


def update_cluster_results(session: Session, project: MasteringProject) -> Operation:
"""Generates clusters based on the latest pair-matching model
Args:
project: Tamr Mastering project
"""
r = session.post(str(project.url) + "recordClusters:refresh")
return operation._from_response(project.url.instance, r)


def publish_clusters(session: Session, project: MasteringProject) -> Operation:
"""Publishes current record clusters
Args:
project: Tamr Mastering project
"""
r = session.post(str(project.url) + "publishedClustersWithData:refresh")
return operation._from_response(project.url.instance, r)

0 comments on commit 50f4cdb

Please sign in to comment.