Skip to content

Commit

Permalink
Merge pull request #262 from juliamcclellan/data_builder
Browse files Browse the repository at this point in the history
Attribute configuration and dataset specs
  • Loading branch information
nbateshaus committed Aug 15, 2019
2 parents aacb671 + 88c91b0 commit ae34775
Show file tree
Hide file tree
Showing 6 changed files with 355 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
- [#218](https://github.com/Datatamer/unify-client-python/issues/218) Delete a `BaseResource`
- [#233](https://github.com/Datatamer/unify-client-python/issues/233) Remove an input dataset from a project
- [#67](https://github.com/Datatamer/unify-client-python/issues/67) Create a dataset from a pandas `DataFrame`
- [#222](https://github.com/Datatamer/unify-client-python/issues/222) Dataset spec to update an existing dataset
- [#225](https://github.com/Datatamer/unify-client-python/issues/225) Attribute configuration spec to update an existing attribute configuration

**BUG FIXES**
- [#235](https://github.com/Datatamer/unify-client-python/issues/235) Making `AttributeCollection` retrieve attributes directly instead of by streaming
Expand Down
12 changes: 12 additions & 0 deletions docs/developer-interface.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ Dataset
.. autoclass:: tamr_unify_client.dataset.resource.Dataset
:members:

Dataset Spec
^^^^^^^^^^^^

.. autoclass:: tamr_unify_client.dataset.resource.DatasetSpec
:members:

Dataset Collection
^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -200,6 +206,12 @@ Attribute Configuration
.. autoclass:: tamr_unify_client.project.attribute_configuration.resource.AttributeConfiguration
:members:

Attribute Configuration Spec
""""""""""""""""""""""""""""

.. autoclass:: tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec
:members:

Attribute Configuration Collection
""""""""""""""""""""""""""""""""""

Expand Down
95 changes: 95 additions & 0 deletions tamr_unify_client/dataset/resource.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from copy import deepcopy

import simplejson as json

from tamr_unify_client.attribute.collection import AttributeCollection
Expand Down Expand Up @@ -262,6 +264,14 @@ def upstream_datasets(self):

return [DatasetURI(self.client, uri) for uri in resources]

def spec(self):
"""Returns this dataset's spec.
:return: The spec of this dataset.
:rtype: :class:`~tamr_unify_client.dataset.resource.DatasetSpec`
"""
return DatasetSpec.of(self)

@property
def __geo_interface__(self):
"""Retrieve a representation of this dataset that conforms to the Python Geo Interface.
Expand Down Expand Up @@ -434,3 +444,88 @@ def _geo_attr_names():
"polygon",
"multiPolygon",
}


class DatasetSpec:
"""A representation of the server view of a dataset."""

def __init__(self, client, data, api_path):
self.client = client
self._data = data
self.api_path = api_path

@staticmethod
def of(resource):
"""Creates a dataset spec from a dataset.
:param resource: The existing dataset.
:type resource: :class:`~tamr_unify_client.dataset.resource.Dataset`
:return: The corresponding dataset spec.
:rtype: :class:`~tamr_unify_client.dataset.resource.DatasetSpec`
"""
return DatasetSpec(resource.client, deepcopy(resource._data), resource.api_path)

def from_data(self, data):
"""Creates a spec with the same client and API path as this one, but new data.
:param data: The data for the new spec.
:type data: dict
:return: The new spec.
:rtype: :class:`~tamr_unify_client.dataset.resource.DatasetSpec`
"""
return DatasetSpec(self.client, data, self.api_path)

def to_dict(self):
"""Returns a version of this spec that conforms to the API representation.
:returns: The spec's dict.
:rtype: dict
"""
return deepcopy(self._data)

def with_external_id(self, new_external_id):
"""Creates a new spec with the same properties, updating external ID.
:param new_external_id: The new external ID.
:type new_external_id: str
:return: A new spec.
:rtype: :class:`~tamr_unify_client.dataset.resource.DatasetSpec`
"""
return self.from_data({**self._data, "externalId": new_external_id})

def with_description(self, new_description):
"""Creates a new spec with the same properties, updating description.
:param new_description: The new description.
:type new_description: str
:return: A new spec.
:rtype: :class:`~tamr_unify_client.dataset.resource.DatasetSpec`
"""
return self.from_data({**self._data, "description": new_description})

def with_tags(self, new_tags):
"""Creates a new spec with the same properties, updating tags.
:param new_tags: The new tags.
:type new_tags: list[str]
:return: A new spec.
:rtype: :class:`~tamr_unify_client.dataset.resource.DatasetSpec`
"""
return self.from_data({**self._data, "tags": new_tags})

def put(self):
"""Updates the dataset on the server.
:return: The modified dataset.
:rtype: :class:`~tamr_unify_client.dataset.resource.Dataset`
"""
new_data = self.client.put(self.api_path, json=self._data).successful().json()
return Dataset.from_json(self.client, new_data, self.api_path)

def __repr__(self):
return (
f"{self.__class__.__module__}."
f"{self.__class__.__qualname__}("
f"relative_id={self._data['relativeId']!r}, "
f"name={self._data['name']!r})"
)
128 changes: 128 additions & 0 deletions tamr_unify_client/project/attribute_configuration/resource.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from copy import deepcopy

from tamr_unify_client.base_resource import BaseResource


Expand Down Expand Up @@ -58,6 +60,14 @@ def attribute_name(self):
""":type: str"""
return self._data.get("attributeName")

def spec(self):
"""Returns this attribute configuration's spec.
:return: The spec of this attribute configuration.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return AttributeConfigurationSpec.of(self)

def __repr__(self):
return (
f"{self.__class__.__module__}."
Expand All @@ -72,3 +82,121 @@ def __repr__(self):
f"numeric_field_resolution={self.numeric_field_resolution!r}, "
f"attribute_name={self.attribute_name!r})"
)


class AttributeConfigurationSpec:
"""A representation of the server view of an attribute configuration."""

def __init__(self, client, data, api_path):
self.client = client
self._data = data
self.api_path = api_path

@staticmethod
def of(resource):
"""Creates an attribute configuration spec from an attribute configuration.
:param resource: The existing attribute configuration.
:type resource: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfiguration`
:return: The corresponding attribute creation spec.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return AttributeConfigurationSpec(
resource.client, deepcopy(resource._data), resource.api_path
)

def from_data(self, data):
"""Creates a spec with the same client and API path as this one, but new data.
:param data: The data for the new spec.
:type data: dict
:return: The new spec.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return AttributeConfigurationSpec(self.client, data, self.api_path)

def to_dict(self):
"""Returns a version of this spec that conforms to the API representation.
:returns: The spec's dict.
:rtype: dict
"""
return deepcopy(self._data)

def with_attribute_role(self, new_attribute_role):
"""Creates a new spec with the same properties, updating attribute role.
:param new_attribute_role: The new attribute role.
:type new_attribute_role: str
:return: A new spec.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return self.from_data({**self._data, "attributeRole": new_attribute_role})

def with_similarity_function(self, new_similarity_function):
"""Creates a new spec with the same properties, updating similarity function.
:param new_similarity_function: The new similarity function.
:type new_similarity_function: str
:return: A new spec.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return self.from_data(
{**self._data, "similarityFunction": new_similarity_function}
)

def with_enabled_for_ml(self, new_enabled_for_ml):
"""Creates a new spec with the same properties, updating enabled for ML.
:param new_enabled_for_ml: Whether the builder is enabled for ML.
:type new_enabled_for_ml: bool
:return: A new spec.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return self.from_data({**self._data, "enabledForMl": new_enabled_for_ml})

def with_tokenizer(self, new_tokenizer):
"""Creates a new spec with the same properties, updating tokenizer.
:param new_tokenizer: The new tokenizer.
:type new_tokenizer: str
:return: A new spec.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return self.from_data({**self._data, "tokenizer": new_tokenizer})

def with_numeric_field_resolution(self, new_numeric_field_resolution):
"""Creates a new spec with the same properties, updating numeric field resolution.
:param new_numeric_field_resolution: The new numeric field resolution.
:type new_numeric_field_resolution: str
:return: A new spec.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfigurationSpec`
"""
return self.from_data(
{**self._data, "numericFieldResolution": new_numeric_field_resolution}
)

def put(self):
"""Updates the attribute configuration on the server.
:return: The modified attribute configuration.
:rtype: :class:`~tamr_unify_client.project.attribute_configuration.resource.AttributeConfiguration`
"""
new_data = self.client.put(self.api_path, json=self._data).successful().json()
return AttributeConfiguration.from_json(self.client, new_data, self.api_path)

def __repr__(self):
return (
f"{self.__class__.__module__}."
f"{self.__class__.__qualname__}("
f"relative_id={self._data['relativeId']!r}, "
f"id={self._data['id']!r}, "
f"relative_attribute_id={self._data['relativeAttributeId']!r}, "
f"attribute_role={self._data['attributeRole']!r}, "
f"similarity_function={self._data['similarityFunction']!r}, "
f"enabled_for_ml={self._data['enabledForMl']!r}, "
f"tokenizer={self._data['tokenizer']!r}, "
f"numeric_field_resolution={self._data['numericFieldResolution']!r}, "
f"attribute_name={self._data['attributeName']!r})"
)
65 changes: 57 additions & 8 deletions tests/unit/test_attribute_configuration.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from functools import partial
import json
from unittest import TestCase

from requests import HTTPError
Expand Down Expand Up @@ -57,22 +59,57 @@ def test_resource_from_json(self):

@responses.activate
def test_delete(self):
base = "http://localhost:9100/api/versioned/v1"
alias = "projects/1/attributeConfigurations"
attribute_id = "26"

url = f"{base}/{alias}/{attribute_id}"
url = f"{self._base}/{self._alias}/{self._attribute_id}"
responses.add(responses.GET, url, json=self._ac_json)
responses.add(responses.DELETE, url, status=204)
responses.add(responses.GET, url, status=404)

collection = AttributeConfigurationCollection(self.tamr, alias)
config = collection.by_resource_id(attribute_id)
collection = AttributeConfigurationCollection(self.tamr, self._alias)
config = collection.by_resource_id(self._attribute_id)

self.assertEqual(config._data, self._ac_json)

response = config.delete()
self.assertEqual(response.status_code, 204)
self.assertRaises(HTTPError, lambda: collection.by_resource_id(attribute_id))
self.assertRaises(
HTTPError, lambda: collection.by_resource_id(self._attribute_id)
)

@responses.activate
def test_update(self):
def create_callback(request, snoop):
snoop["payload"] = request.body
return 200, {}, json.dumps(self._updated_ac_json)

configs_url = f"{self._base}/{self._alias}"
config_url = f"{configs_url}/{self._attribute_id}"

snoop_dict = {}
responses.add(responses.GET, config_url, json=self._ac_json)
responses.add_callback(
responses.PUT, config_url, partial(create_callback, snoop=snoop_dict)
)
configs = AttributeConfigurationCollection(self.tamr, self._alias)
config = configs.by_resource_id(self._attribute_id)

temp_spec = config.spec().with_attribute_role("SUM_ATTRIBUTE")
new_config = (
temp_spec.with_enabled_for_ml(False)
.with_similarity_function("ABSOLUTE_DIFF")
.with_tokenizer("BIGRAM")
.put()
)

self.assertEqual(new_config._data, self._updated_ac_json)
self.assertEqual(json.loads(snoop_dict["payload"]), self._updated_ac_json)
self.assertEqual(config._data, self._ac_json)

# checking that intermediate didn't change
self.assertTrue(temp_spec.to_dict()["enabledForMl"])

_base = "http://localhost:9100/api/versioned/v1"
_alias = "projects/1/attributeConfigurations"
_attribute_id = "26"

_ac_json = {
"id": "unify://unified-data/v1/projects/1/attributeConfigurations/26",
Expand All @@ -85,3 +122,15 @@ def test_delete(self):
"numericFieldResolution": [],
"attributeName": "surname",
}

_updated_ac_json = {
"id": "unify://unified-data/v1/projects/1/attributeConfigurations/26",
"relativeId": "projects/1/attributeConfigurations/26",
"relativeAttributeId": "datasets/8/attributes/surname",
"attributeRole": "SUM_ATTRIBUTE",
"similarityFunction": "ABSOLUTE_DIFF",
"enabledForMl": False,
"tokenizer": "BIGRAM",
"numericFieldResolution": [],
"attributeName": "surname",
}

0 comments on commit ae34775

Please sign in to comment.