Skip to content

Commit

Permalink
Merge pull request #435 from abafzal/create_project
Browse files Browse the repository at this point in the history
Create projects in Tamr
  • Loading branch information
pcattori committed Aug 17, 2020
2 parents 4ec7db6 + ba3b355 commit d8755d1
Show file tree
Hide file tree
Showing 15 changed files with 261 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
**NEW FEATURES**
- [#383](https://github.com/Datatamer/tamr-client/issues/383) Now able to create an Operation from Job resource id
- [#425](https://github.com/Datatamer/tamr-client/pull/425) Now able to get, update and delete manual labels for Categorization projects
- [#435](https://github.com/Datatamer/tamr-client/pull/435) Now able to create projects of the following type in Tamr: Categorization, Mastering, Schema Mapping

## 0.12.0
**BETA**
Expand Down
1 change: 1 addition & 0 deletions docs/beta.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* [Operation](beta/operation)
* [Primary Key](beta/primary_key)
* [Project](beta/project)
* [Schema Mapping](beta/schema_mapping)
* [Transformations](beta/transformations)
* [Response](beta/response)
* [Session](beta/session)
2 changes: 1 addition & 1 deletion docs/beta/categorization.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# Categoriation
# Categorization

* [Project](/beta/categorization/project)
1 change: 1 addition & 0 deletions docs/beta/categorization/project.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ Categorization Project

.. autoclass:: tamr_client.CategorizationProject

.. autofunction:: tamr_client.categorization.project.create
.. autofunction:: tamr_client.categorization.project.manual_labels
2 changes: 2 additions & 0 deletions docs/beta/mastering/project.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@ Mastering Project
=================

.. autoclass:: tamr_client.MasteringProject

.. autofunction:: tamr_client.mastering.project.create
3 changes: 3 additions & 0 deletions docs/beta/schema_mapping.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Schema Mapping

* [Project](/beta/schema_mapping/project)
6 changes: 6 additions & 0 deletions docs/beta/schema_mapping/project.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Schema Mapping Project
======================

.. autoclass:: tamr_client.SchemaMappingProject

.. autofunction:: tamr_client.schema_mapping.project.create
2 changes: 2 additions & 0 deletions tamr_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
MasteringProject,
Operation,
Project,
SchemaMappingProject,
Session,
SubAttribute,
Transformations,
Expand All @@ -47,6 +48,7 @@
from tamr_client import primary_key
from tamr_client import project
from tamr_client import response
from tamr_client import schema_mapping
from tamr_client import session
from tamr_client import transformations
from tamr_client.dataset import dataframe
Expand Down
7 changes: 6 additions & 1 deletion tamr_client/_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
from tamr_client._types.instance import Instance
from tamr_client._types.json import JsonDict
from tamr_client._types.operation import Operation
from tamr_client._types.project import CategorizationProject, MasteringProject, Project
from tamr_client._types.project import (
CategorizationProject,
MasteringProject,
Project,
SchemaMappingProject,
)
from tamr_client._types.session import Session
from tamr_client._types.transformations import InputTransformation, Transformations
from tamr_client._types.url import URL
23 changes: 20 additions & 3 deletions tamr_client/_types/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@
from tamr_client._types.url import URL


@dataclass(frozen=True)
class CategorizationProject:
"""A Tamr Categorization project
See https://docs.tamr.com/reference/the-project-object
Args:
url
name
description
"""

url: URL
name: str
description: Optional[str] = None


@dataclass(frozen=True)
class MasteringProject:
"""A Tamr Mastering project
Expand All @@ -22,8 +39,8 @@ class MasteringProject:


@dataclass(frozen=True)
class CategorizationProject:
"""A Tamr Categorization project
class SchemaMappingProject:
"""A Tamr Schema Mapping project
See https://docs.tamr.com/reference/the-project-object
Expand All @@ -38,4 +55,4 @@ class CategorizationProject:
description: Optional[str] = None


Project = Union[MasteringProject, CategorizationProject]
Project = Union[CategorizationProject, MasteringProject, SchemaMappingProject]
42 changes: 41 additions & 1 deletion tamr_client/categorization/project.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from typing import Optional

from tamr_client import project
from tamr_client._types import (
CategorizationProject,
Dataset,
Instance,
JsonDict,
Project,
Session,
URL,
)
Expand All @@ -21,10 +25,46 @@ def _from_json(url: URL, data: JsonDict) -> CategorizationProject:
)


def create(
session: Session,
instance: Instance,
name: str,
description: Optional[str] = None,
external_id: Optional[str] = None,
unified_dataset_name: Optional[str] = None,
) -> Project:
"""Create a Categorization project in Tamr.
Args:
instance: Tamr instance
name: Project name
description: Project description
external_id: External ID of the project
unified_dataset_name: Unified dataset name. If None, will be set to project name + _'unified_dataset'
Returns:
Project created in Tamr
Raises:
AlreadyExists: If a project with these specifications already exists
requests.HTTPError: If any other HTTP error is encountered
"""
return project._create(
session=session,
instance=instance,
name=name,
project_type="CATEGORIZATION",
description=description,
external_id=external_id,
unified_dataset_name=unified_dataset_name,
)


def manual_labels(
session: Session, instance: Instance, project: CategorizationProject
) -> Dataset:
"""Get manual labels from a Categorization project
"""Get manual labels from a Categorization project.
Args:
instance: Tamr instance containing project
project: Tamr project containing labels
Expand Down
47 changes: 46 additions & 1 deletion tamr_client/mastering/project.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
from tamr_client._types import JsonDict, MasteringProject, URL
from typing import Optional

from tamr_client import project
from tamr_client._types import (
Instance,
JsonDict,
MasteringProject,
Project,
Session,
URL,
)


def _from_json(url: URL, data: JsonDict) -> MasteringProject:
Expand All @@ -9,3 +19,38 @@ def _from_json(url: URL, data: JsonDict) -> MasteringProject:
data: Project JSON data from Tamr server
"""
return MasteringProject(url, name=data["name"], description=data.get("description"))


def create(
session: Session,
instance: Instance,
name: str,
description: Optional[str] = None,
external_id: Optional[str] = None,
unified_dataset_name: Optional[str] = None,
) -> Project:
"""Create a Mastering project in Tamr.
Args:
instance: Tamr instance
name: Project name
description: Project description
external_id: External ID of the project
unified_dataset_name: Unified dataset name. If None, will be set to project name + _'unified_dataset'
Returns:
Project created in Tamr
Raises:
AlreadyExists: If a project with these specifications already exists.
requests.HTTPError: If any other HTTP error is encountered.
"""
return project._create(
session=session,
instance=instance,
name=name,
project_type="DEDUP",
description=description,
external_id=external_id,
unified_dataset_name=unified_dataset_name,
)
72 changes: 68 additions & 4 deletions tamr_client/project.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from typing import Optional

from tamr_client import response
from tamr_client._types import Instance, JsonDict, Project, Session, URL
from tamr_client.categorization import project as categorization_project
from tamr_client.exception import TamrClientException
from tamr_client.mastering import project as mastering_project
from tamr_client.schema_mapping import project as schema_mapping_project


class NotFound(TamrClientException):
Expand All @@ -12,12 +15,20 @@ class NotFound(TamrClientException):
pass


class AlreadyExists(TamrClientException):
"""Raised when a project with these specifications already exists."""

pass


def from_resource_id(session: Session, instance: Instance, id: str) -> Project:
"""Get project by resource ID
Fetches project from Tamr server
"""Get project by resource ID.
Fetches project from Tamr server.
Args:
instance: Tamr instance containing this dataset
id: Project ID
Raises:
project.NotFound: If no project could be found at the specified URL.
Corresponds to a 404 HTTP error.
Expand All @@ -28,10 +39,12 @@ def from_resource_id(session: Session, instance: Instance, id: str) -> Project:


def _from_url(session: Session, url: URL) -> Project:
"""Get project by URL
Fetches project from Tamr server
"""Get project by URL.
Fetches project from Tamr server.
Args:
url: Project URL
Raises:
NotFound: If no project could be found at the specified URL.
Corresponds to a 404 HTTP error.
Expand All @@ -55,5 +68,56 @@ def _from_json(url: URL, data: JsonDict) -> Project:
return mastering_project._from_json(url, data)
elif proj_type == "CATEGORIZATION":
return categorization_project._from_json(url, data)
elif proj_type == "SCHEMA_MAPPING_RECOMMENDATIONS":
return schema_mapping_project._from_json(url, data)
else:
raise ValueError(f"Unrecognized project type '{proj_type}' in {repr(data)}")


def _create(
session: Session,
instance: Instance,
name: str,
project_type: str,
description: Optional[str] = None,
external_id: Optional[str] = None,
unified_dataset_name: Optional[str] = None,
) -> Project:
"""Create a project in Tamr.
Args:
instance: Tamr instance
name: Project name
project_type: Project type
description: Project description
external_id: External ID of the project
unified_dataset_name: Name of the unified dataset
Returns:
Project created in Tamr
Raises:
AlreadyExists: If a project with these specifications already exists.
requests.HTTPError: If any other HTTP error is encountered.
"""
if not unified_dataset_name:
unified_dataset_name = name + "_unified_dataset"
data = {
"name": name,
"type": project_type,
"unifiedDatasetName": unified_dataset_name,
"description": description,
"externalId": external_id,
}

project_url = URL(instance=instance, path="projects")
r = session.post(url=str(project_url), json=data)

if r.status_code == 409:
raise AlreadyExists(r.json()["message"])

data = response.successful(r).json()
project_path = data["relativeId"]
project_url = URL(instance=instance, path=str(project_path))

return _from_url(session=session, url=project_url)
5 changes: 5 additions & 0 deletions tamr_client/schema_mapping/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""
Tamr - Schema Mapping
See https://docs.tamr.com/new/docs/overall-workflow-schema
"""
from tamr_client.schema_mapping import project

0 comments on commit d8755d1

Please sign in to comment.