-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #421 from keziah-tamr/transformations
Transformations
- Loading branch information
Showing
9 changed files
with
283 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
Transformations | ||
=============== | ||
|
||
.. autofunction:: tamr_client.transformations.get_all | ||
.. autofunction:: tamr_client.transformations.replace_all |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
from dataclasses import dataclass, field | ||
from typing import List | ||
|
||
from tamr_client._types import Dataset | ||
|
||
|
||
@dataclass(frozen=True) | ||
class InputTransformation: | ||
transformation: str | ||
datasets: List[Dataset] = field(default_factory=list) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class Transformations: | ||
input_scope: List[InputTransformation] = field(default_factory=list) | ||
unified_scope: List[str] = field(default_factory=list) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import requests | ||
|
||
from tamr_client import dataset, response | ||
from tamr_client._types import ( | ||
InputTransformation, | ||
Instance, | ||
JsonDict, | ||
Project, | ||
Session, | ||
Transformations, | ||
) | ||
|
||
|
||
def _input_transformation_from_json( | ||
session: Session, instance: Instance, data: JsonDict | ||
) -> InputTransformation: | ||
"""Make input transformation from JSON data (deserialize) | ||
Args: | ||
instance: Tamr instance containing this transformation | ||
data: Input scoped transformation JSON data from Tamr server | ||
""" | ||
dataset_resource_ids = [d["datasetId"].split("/")[-1] for d in data["datasets"]] | ||
datasets = [ | ||
dataset.from_resource_id(session, instance, d_id) | ||
for d_id in dataset_resource_ids | ||
] | ||
return InputTransformation(transformation=data["transformation"], datasets=datasets) | ||
|
||
|
||
def _from_json(session: Session, instance: Instance, data: JsonDict) -> Transformations: | ||
"""Make transformations from JSON data (deserialize) | ||
Args: | ||
instance: Tamr instance containing this transformation | ||
data: Transformation JSON data from Tamr server | ||
""" | ||
return Transformations( | ||
unified_scope=data["unified"], | ||
input_scope=[ | ||
_input_transformation_from_json(session, instance, tx) | ||
for tx in data["parameterized"] | ||
], | ||
) | ||
|
||
|
||
def _input_transformation_to_json(tx: InputTransformation) -> JsonDict: | ||
"""Convert input transformations to JSON data (serialize) | ||
Args: | ||
tx: Input transformation to convert | ||
""" | ||
# datasetId omitted, only one of "datasetId" or "relativeDatasetId" is required | ||
dataset_json = [ | ||
{"name": d.name, "relativeDatasetId": d.url.path} for d in tx.datasets | ||
] | ||
|
||
return {"datasets": dataset_json, "transformation": tx.transformation} | ||
|
||
|
||
def _to_json(tx: Transformations) -> JsonDict: | ||
"""Convert transformations to JSON data (serialize) | ||
Args: | ||
tx: Transformations to convert | ||
""" | ||
return { | ||
"parameterized": [_input_transformation_to_json(t) for t in tx.input_scope], | ||
"unified": tx.unified_scope, | ||
} | ||
|
||
|
||
def get_all(session: Session, project: Project) -> Transformations: | ||
"""Get the transformations of a Project | ||
Args: | ||
project: Project containing transformations | ||
Raises: | ||
requests.HTTPError: If any HTTP error is encountered. | ||
Example: | ||
>>> import tamr_client as tc | ||
>>> session = tc.session.from_auth('username', 'password') | ||
>>> instance = tc.instance.Instance(host="localhost", port=9100) | ||
>>> project1 = tc.project.from_resource_id(session, instance, id='1') | ||
>>> print(tc.transformations.get_all(session, project1)) | ||
""" | ||
r = session.get(f"{project.url}/transformations") | ||
response.successful(r) | ||
return _from_json(session, project.url.instance, r.json()) | ||
|
||
|
||
def replace_all( | ||
session: Session, project: Project, tx: Transformations | ||
) -> requests.Response: | ||
"""Replaces the transformations of a Project | ||
Args: | ||
project: Project to place transformations within | ||
tx: Transformations to put into project | ||
Raises: | ||
requests.HTTPError: If any HTTP error is encountered. | ||
Example: | ||
>>> import tamr_client as tc | ||
>>> session = tc.session.from_auth('username', 'password') | ||
>>> instance = tc.instance.Instance(host="localhost", port=9100) | ||
>>> project1 = tc.project.from_resource_id(session, instance, id='1') | ||
>>> dataset3 = tc.dataset.from_resource_id(session, instance, id='3') | ||
>>> new_input_tx = tc.InputTransformation("SELECT *, upper(name) as name;", [dataset3]) | ||
>>> all_tx = tc.Transformations( | ||
... input_scope=[new_input_tx], | ||
... unified_scope=["SELECT *, 1 as one;"] | ||
... ) | ||
>>> tc.transformations.replace_all(session, project1, all_tx) | ||
""" | ||
body = _to_json(tx) | ||
r = session.put(f"{project.url}/transformations", json=body) | ||
|
||
return response.successful(r) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{ | ||
"parameterized": [ | ||
{ | ||
"datasets": [], | ||
"transformation": "SELECT *, 1 as one;" | ||
}, | ||
{ | ||
"datasets": [ | ||
{ | ||
"name": "dataset 1 name", | ||
"datasetId": "unify://unified-data/v1/datasets/1", | ||
"relativeDatasetId": "datasets/1" | ||
} | ||
], | ||
"transformation": "SELECT *, 2 as two;" | ||
} | ||
], | ||
"unified": [ | ||
"//Comment\nSELECT *;" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import pytest | ||
from requests import HTTPError | ||
import responses | ||
|
||
import tamr_client as tc | ||
from tests.tamr_client import utils | ||
|
||
|
||
@responses.activate | ||
def test_get_all(): | ||
# setup | ||
project_json = utils.load_json("mastering_project.json") | ||
project_url = tc.URL(path="projects/1") | ||
responses.add(responses.GET, str(project_url), json=project_json) | ||
|
||
tx_json = utils.load_json("transformations.json") | ||
tx_url = tc.URL(path="projects/1/transformations") | ||
responses.add(responses.GET, str(tx_url), json=tx_json) | ||
|
||
dataset_json = utils.load_json("dataset.json") | ||
dataset_url = tc.URL(path="datasets/1") | ||
responses.add(responses.GET, str(dataset_url), json=dataset_json) | ||
|
||
# test | ||
s = utils.session() | ||
instance = utils.instance() | ||
project = tc.project.from_resource_id(s, instance, "1") | ||
|
||
transforms = tc.transformations.get_all(s, project) | ||
|
||
assert len(transforms.input_scope) == 2 | ||
assert len(transforms.unified_scope) == 1 | ||
|
||
assert len(transforms.input_scope[0].datasets) == 0 | ||
assert transforms.input_scope[0].transformation == "SELECT *, 1 as one;" | ||
assert len(transforms.input_scope[1].datasets) == 1 | ||
assert transforms.input_scope[1].datasets[0].name == "dataset 1 name" | ||
assert transforms.input_scope[1].transformation == "SELECT *, 2 as two;" | ||
|
||
assert transforms.unified_scope[0] == "//Comment\nSELECT *;" | ||
|
||
|
||
@responses.activate | ||
def test_replace_all(): | ||
# setup | ||
project_json = utils.load_json("mastering_project.json") | ||
project_url = tc.URL(path="projects/1") | ||
responses.add(responses.GET, str(project_url), json=project_json) | ||
|
||
tx_json = utils.load_json("transformations.json") | ||
tx_url = tc.URL(path="projects/1/transformations") | ||
responses.add(responses.GET, str(tx_url), json=tx_json) | ||
|
||
dataset_json = utils.load_json("dataset.json") | ||
dataset_url = tc.URL(path="datasets/1") | ||
responses.add(responses.GET, str(dataset_url), json=dataset_json) | ||
|
||
# test | ||
s = utils.session() | ||
instance = utils.instance() | ||
project = tc.project.from_resource_id(s, instance, "1") | ||
|
||
transforms = tc.transformations._from_json(s, instance, tx_json) | ||
transforms.unified_scope.append("//extra TX") | ||
transforms.input_scope.pop(1) | ||
|
||
responses.add( | ||
responses.PUT, str(tx_url), json=tc.transformations._to_json(transforms) | ||
) | ||
|
||
r = tc.transformations.replace_all(s, project, transforms) | ||
|
||
posted_tx = tc.transformations._from_json(s, project.url.instance, r.json()) | ||
|
||
assert len(posted_tx.input_scope) == 1 | ||
assert len(posted_tx.unified_scope) == 2 | ||
|
||
assert len(posted_tx.input_scope[0].datasets) == 0 | ||
assert posted_tx.input_scope[0].transformation == "SELECT *, 1 as one;" | ||
|
||
assert posted_tx.unified_scope[0] == "//Comment\nSELECT *;" | ||
assert posted_tx.unified_scope[1] == "//extra TX" | ||
|
||
|
||
@responses.activate | ||
def test_replace_all_errors(): | ||
# setup | ||
project_json = utils.load_json("mastering_project.json") | ||
project_url = tc.URL(path="projects/1") | ||
responses.add(responses.GET, str(project_url), json=project_json) | ||
|
||
tx_json = utils.load_json("transformations.json") | ||
tx_url = tc.URL(path="projects/1/transformations") | ||
responses.add(responses.GET, str(tx_url), json=tx_json) | ||
|
||
dataset_json = utils.load_json("dataset.json") | ||
dataset_url = tc.URL(path="datasets/1") | ||
responses.add(responses.GET, str(dataset_url), json=dataset_json) | ||
|
||
# test | ||
s = utils.session() | ||
instance = utils.instance() | ||
project = tc.project.from_resource_id(s, instance, "1") | ||
|
||
transforms = tc.transformations._from_json(s, instance, tx_json) | ||
|
||
responses.add(responses.PUT, str(tx_url), status=400) | ||
|
||
with pytest.raises(HTTPError): | ||
tc.transformations.replace_all(s, project, transforms) |