Skip to content

Commit

Permalink
Merge pull request #421 from keziah-tamr/transformations
Browse files Browse the repository at this point in the history
Transformations
  • Loading branch information
pcattori committed Jul 14, 2020
2 parents cce4cd4 + 1052727 commit a24c31d
Show file tree
Hide file tree
Showing 9 changed files with 283 additions and 2 deletions.
6 changes: 4 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
## 0.13.0-dev
**BETA**
**BETA**
Important: Do not use BETA features for production workflows.
- Added function to get operation from resource ID

- [#421](https://github.com/Datatamer/tamr-client/pull/421) Added functions for getting and replacing the transformations of a projects via `tc.transformations.get_all()` and `tc.transformations.replace_all()`
- Added new dataclasses `Transformations` and `InputTransformations` to support these functions

**NEW FEATURES**
- [#383](https://github.com/Datatamer/tamr-client/issues/383) Now able to create an Operation from Job resource id

Expand Down
1 change: 1 addition & 0 deletions docs/beta.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@
* [Operation](beta/operation)
* [Primary Key](beta/primary_key)
* [Project](beta/project)
* [Transformations](beta/transformations)
* [Response](beta/response)
* [Session](beta/session)
5 changes: 5 additions & 0 deletions docs/beta/transformations.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Transformations
===============

.. autofunction:: tamr_client.transformations.get_all
.. autofunction:: tamr_client.transformations.replace_all
3 changes: 3 additions & 0 deletions tamr_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
Attribute,
AttributeType,
Dataset,
InputTransformation,
Instance,
MasteringProject,
Operation,
Project,
Session,
SubAttribute,
Transformations,
UnifiedDataset,
URL,
UsernamePasswordAuth,
Expand All @@ -44,6 +46,7 @@
from tamr_client import project
from tamr_client import response
from tamr_client import session
from tamr_client import transformations
from tamr_client.dataset import dataframe
from tamr_client.dataset import record
from tamr_client.exception import TamrClientException
1 change: 1 addition & 0 deletions tamr_client/_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,5 @@
from tamr_client._types.operation import Operation
from tamr_client._types.project import MasteringProject, Project
from tamr_client._types.session import Session
from tamr_client._types.transformations import InputTransformation, Transformations
from tamr_client._types.url import URL
16 changes: 16 additions & 0 deletions tamr_client/_types/transformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from dataclasses import dataclass, field
from typing import List

from tamr_client._types import Dataset


@dataclass(frozen=True)
class InputTransformation:
transformation: str
datasets: List[Dataset] = field(default_factory=list)


@dataclass(frozen=True)
class Transformations:
input_scope: List[InputTransformation] = field(default_factory=list)
unified_scope: List[str] = field(default_factory=list)
122 changes: 122 additions & 0 deletions tamr_client/transformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import requests

from tamr_client import dataset, response
from tamr_client._types import (
InputTransformation,
Instance,
JsonDict,
Project,
Session,
Transformations,
)


def _input_transformation_from_json(
session: Session, instance: Instance, data: JsonDict
) -> InputTransformation:
"""Make input transformation from JSON data (deserialize)
Args:
instance: Tamr instance containing this transformation
data: Input scoped transformation JSON data from Tamr server
"""
dataset_resource_ids = [d["datasetId"].split("/")[-1] for d in data["datasets"]]
datasets = [
dataset.from_resource_id(session, instance, d_id)
for d_id in dataset_resource_ids
]
return InputTransformation(transformation=data["transformation"], datasets=datasets)


def _from_json(session: Session, instance: Instance, data: JsonDict) -> Transformations:
"""Make transformations from JSON data (deserialize)
Args:
instance: Tamr instance containing this transformation
data: Transformation JSON data from Tamr server
"""
return Transformations(
unified_scope=data["unified"],
input_scope=[
_input_transformation_from_json(session, instance, tx)
for tx in data["parameterized"]
],
)


def _input_transformation_to_json(tx: InputTransformation) -> JsonDict:
"""Convert input transformations to JSON data (serialize)
Args:
tx: Input transformation to convert
"""
# datasetId omitted, only one of "datasetId" or "relativeDatasetId" is required
dataset_json = [
{"name": d.name, "relativeDatasetId": d.url.path} for d in tx.datasets
]

return {"datasets": dataset_json, "transformation": tx.transformation}


def _to_json(tx: Transformations) -> JsonDict:
"""Convert transformations to JSON data (serialize)
Args:
tx: Transformations to convert
"""
return {
"parameterized": [_input_transformation_to_json(t) for t in tx.input_scope],
"unified": tx.unified_scope,
}


def get_all(session: Session, project: Project) -> Transformations:
"""Get the transformations of a Project
Args:
project: Project containing transformations
Raises:
requests.HTTPError: If any HTTP error is encountered.
Example:
>>> import tamr_client as tc
>>> session = tc.session.from_auth('username', 'password')
>>> instance = tc.instance.Instance(host="localhost", port=9100)
>>> project1 = tc.project.from_resource_id(session, instance, id='1')
>>> print(tc.transformations.get_all(session, project1))
"""
r = session.get(f"{project.url}/transformations")
response.successful(r)
return _from_json(session, project.url.instance, r.json())


def replace_all(
session: Session, project: Project, tx: Transformations
) -> requests.Response:
"""Replaces the transformations of a Project
Args:
project: Project to place transformations within
tx: Transformations to put into project
Raises:
requests.HTTPError: If any HTTP error is encountered.
Example:
>>> import tamr_client as tc
>>> session = tc.session.from_auth('username', 'password')
>>> instance = tc.instance.Instance(host="localhost", port=9100)
>>> project1 = tc.project.from_resource_id(session, instance, id='1')
>>> dataset3 = tc.dataset.from_resource_id(session, instance, id='3')
>>> new_input_tx = tc.InputTransformation("SELECT *, upper(name) as name;", [dataset3])
>>> all_tx = tc.Transformations(
... input_scope=[new_input_tx],
... unified_scope=["SELECT *, 1 as one;"]
... )
>>> tc.transformations.replace_all(session, project1, all_tx)
"""
body = _to_json(tx)
r = session.put(f"{project.url}/transformations", json=body)

return response.successful(r)
21 changes: 21 additions & 0 deletions tests/tamr_client/data/transformations.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"parameterized": [
{
"datasets": [],
"transformation": "SELECT *, 1 as one;"
},
{
"datasets": [
{
"name": "dataset 1 name",
"datasetId": "unify://unified-data/v1/datasets/1",
"relativeDatasetId": "datasets/1"
}
],
"transformation": "SELECT *, 2 as two;"
}
],
"unified": [
"//Comment\nSELECT *;"
]
}
110 changes: 110 additions & 0 deletions tests/tamr_client/test_transformations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import pytest
from requests import HTTPError
import responses

import tamr_client as tc
from tests.tamr_client import utils


@responses.activate
def test_get_all():
# setup
project_json = utils.load_json("mastering_project.json")
project_url = tc.URL(path="projects/1")
responses.add(responses.GET, str(project_url), json=project_json)

tx_json = utils.load_json("transformations.json")
tx_url = tc.URL(path="projects/1/transformations")
responses.add(responses.GET, str(tx_url), json=tx_json)

dataset_json = utils.load_json("dataset.json")
dataset_url = tc.URL(path="datasets/1")
responses.add(responses.GET, str(dataset_url), json=dataset_json)

# test
s = utils.session()
instance = utils.instance()
project = tc.project.from_resource_id(s, instance, "1")

transforms = tc.transformations.get_all(s, project)

assert len(transforms.input_scope) == 2
assert len(transforms.unified_scope) == 1

assert len(transforms.input_scope[0].datasets) == 0
assert transforms.input_scope[0].transformation == "SELECT *, 1 as one;"
assert len(transforms.input_scope[1].datasets) == 1
assert transforms.input_scope[1].datasets[0].name == "dataset 1 name"
assert transforms.input_scope[1].transformation == "SELECT *, 2 as two;"

assert transforms.unified_scope[0] == "//Comment\nSELECT *;"


@responses.activate
def test_replace_all():
# setup
project_json = utils.load_json("mastering_project.json")
project_url = tc.URL(path="projects/1")
responses.add(responses.GET, str(project_url), json=project_json)

tx_json = utils.load_json("transformations.json")
tx_url = tc.URL(path="projects/1/transformations")
responses.add(responses.GET, str(tx_url), json=tx_json)

dataset_json = utils.load_json("dataset.json")
dataset_url = tc.URL(path="datasets/1")
responses.add(responses.GET, str(dataset_url), json=dataset_json)

# test
s = utils.session()
instance = utils.instance()
project = tc.project.from_resource_id(s, instance, "1")

transforms = tc.transformations._from_json(s, instance, tx_json)
transforms.unified_scope.append("//extra TX")
transforms.input_scope.pop(1)

responses.add(
responses.PUT, str(tx_url), json=tc.transformations._to_json(transforms)
)

r = tc.transformations.replace_all(s, project, transforms)

posted_tx = tc.transformations._from_json(s, project.url.instance, r.json())

assert len(posted_tx.input_scope) == 1
assert len(posted_tx.unified_scope) == 2

assert len(posted_tx.input_scope[0].datasets) == 0
assert posted_tx.input_scope[0].transformation == "SELECT *, 1 as one;"

assert posted_tx.unified_scope[0] == "//Comment\nSELECT *;"
assert posted_tx.unified_scope[1] == "//extra TX"


@responses.activate
def test_replace_all_errors():
# setup
project_json = utils.load_json("mastering_project.json")
project_url = tc.URL(path="projects/1")
responses.add(responses.GET, str(project_url), json=project_json)

tx_json = utils.load_json("transformations.json")
tx_url = tc.URL(path="projects/1/transformations")
responses.add(responses.GET, str(tx_url), json=tx_json)

dataset_json = utils.load_json("dataset.json")
dataset_url = tc.URL(path="datasets/1")
responses.add(responses.GET, str(dataset_url), json=dataset_json)

# test
s = utils.session()
instance = utils.instance()
project = tc.project.from_resource_id(s, instance, "1")

transforms = tc.transformations._from_json(s, instance, tx_json)

responses.add(responses.PUT, str(tx_url), status=400)

with pytest.raises(HTTPError):
tc.transformations.replace_all(s, project, transforms)

0 comments on commit a24c31d

Please sign in to comment.