Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
* Create a fixture mocking a SourceRecord with validated fields
* Revise ValidateGeoshapeWKT validator to handle different cases:
   * field method returns invalid string value (shapely cannot parse)
   * field method returns Nonetype value
   * field method returns value that is (not None AND not string)
* Update test suite as needed
  • Loading branch information
jonavellecuerdo committed Feb 9, 2024
1 parent 7b03b8d commit 895b4ca
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 85 deletions.
74 changes: 60 additions & 14 deletions harvester/records/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,51 +24,97 @@
# Data Validators
###################
class ValidateGeoshapeWKT:
"""Decorator class for validating geoshape WKT values.
"""Decorator class for validating geoshape string values.
The validator should be applied to any field methods that retrieve geoshape
WKT values. The validator logs a warning if the WKT value cannot be parsed
using the shapely module. If the WKT value cannot be parsed, the validator
resets the value to None.
string values. If the field method retrieves a string value, the validator
will determine if shapely can parse the string as WKT.
Warning messages will be logged for the following scenarios:
1. Invalid WKT: Field method retrieves string value but shapely was unable to
parse string as WKT. Validator returns None.
2. Invalid None: Field method retrieves NoneType value. Validator returns None.
3. Invalid Type: Field method retrieves value that is neither of NoneType or a string
value. Validator returns None.
The validator will return the original value only if shapely was able to successfully
create a geometry object from the string value retrieved by the field method.
"""

invalid_wkt_warning_message: str = (
"field: {field_name}, shapely was unable to parse WKT: '{value}'; "
"field: {field_name}, shapely was unable to parse string as WKT: '{value}'; "
"setting value to None"
)
invalid_none_warning_message: str = (
"field: {field_name}, value of type NoneType was provided; "
"returning original value of None"
)
invalid_type_warning_message: str = (
"field: {field_name}, value of type {type} was provided: {value}; "
"setting value to None"
)

def __init__(self, field_method: Callable):
update_wrapper(self, field_method)
self.field_method = field_method
self.field_name = field_method.__name__.removeprefix("_")

def __call__(self, obj: object) -> str | None:
field_name = self.field_method.__name__.removeprefix("_")
"""Validate string values retrieved by field method."""
value = self.field_method(obj)

if isinstance(value, str):
return self.validate(value)

if value is None:
logger.warning(
FieldValueInvalidWarning(
self.invalid_none_warning_message.format(
field_name=self.field_name, value=value
)
)
)
return None

logger.warning(
FieldValueInvalidWarning(
self.invalid_type_warning_message.format(
field_name=self.field_name, type=type(value), value=value
)
)
)
return None

def __get__(self, obj: object, _: type) -> partial[str | None]:
"""Required by decorator to access SourceRecord instance"""
return partial(self.__call__, obj)

def validate(self, value: str) -> str | None:
"""Validate geoshape WKT string value with shapely.
The geoshape WKT string value is considered valid if shapely can
successfully create a geometry object.
"""
try:
self.create_geoshape(value)
except Exception: # noqa: BLE001
logger.warning(
FieldValueInvalidWarning(
self.invalid_wkt_warning_message.format(
field_name=field_name, value=value
field_name=self.field_name, value=value
)
)
)
return None
return value

def __get__(self, obj: object, _: type) -> partial[str | None]:
"""Required by decorator to access SourceRecord instance"""
return partial(self.__call__, obj)

@staticmethod
def create_geoshape(wkt: str) -> shapely.Geometry:
"""Run shapely to determine whether geoshape WKT value is valid.
If the geoshape WKT value is valid, shapely can successfully create a
geometry object.
Note: shapely does not currently support WKT values for bounding box regions or
envelopes. The method uses regex to retrieving the vertices for a geoshape WKT
value with the format: "ENVELOPE(<vertices>)". The regex retrieves the vertices
Expand Down
16 changes: 16 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from harvester.harvest.mit import MITHarvester
from harvester.harvest.ogm import OGMHarvester, OGMRepository
from harvester.records import FGDC, ISO19139, MITAardvark, Record, XMLSourceRecord
from harvester.records.validators import ValidateGeoshapeWKT


@pytest.fixture(autouse=True)
Expand Down Expand Up @@ -242,6 +243,21 @@ def _dct_references_s(self):
)


@pytest.fixture
def mocked_validated_fields_source_record():
class TestValidatedSourceRecord:
mocked_value = ""

@ValidateGeoshapeWKT
def _dcat_bbox(self):
return self.mocked_value

def _locn_geometry(self):
return self._dcat_bbox()

return TestValidatedSourceRecord


@pytest.fixture
def valid_mitaardvark_data_required_fields():
return {
Expand Down
150 changes: 79 additions & 71 deletions tests/test_records/test_validator.py
Original file line number Diff line number Diff line change
@@ -1,98 +1,116 @@
from harvester.records.validators import ValidateGeoshapeWKT
def test_validator_envelope_none_nonetype_skips_validation_logs_warning_returns_none(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")
mocked_validated_fields_source_record.mocked_value = None
value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001
invalid_none_warning_message = (
"field: dcat_bbox, value of type NoneType was provided; "
"returning original value of None"
)
assert value is None
assert invalid_none_warning_message in caplog.text


def test_validator_envelope_returns_original_value_success(
caplog,
def test_validator_envelope_none_string_skips_validation_logs_warning_returns_none(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")
mocked_validated_fields_source_record.mocked_value = "None"
value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001
invalid_wkt_warning_message = (
"field: dcat_bbox, shapely was unable to parse string as WKT: 'None'; "
"setting value to None"
)
assert value is None
assert invalid_wkt_warning_message in caplog.text

class TestValidatedXMLSourceRecord:
@ValidateGeoshapeWKT
def _dcat_bbox(self):
return "ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)"

value = TestValidatedXMLSourceRecord()._dcat_bbox() # noqa: SLF001
assert value == "ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)"
def test_validator_envelope_nonstring_skips_validation_logs_warning_returns_none(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")
mocked_validated_fields_source_record.mocked_value = 999
value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001
invalid_type_warning_message = (
"field: dcat_bbox, value of type <class 'int'> was provided: 999; "
"setting value to None"
)
assert value is None
assert invalid_type_warning_message in caplog.text


def test_validator_envelope_missing_vertices_logs_warning_and_sets_value_to_none(
caplog,
def test_validator_envelope_returns_original_value_success(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")
mocked_validated_fields_source_record.mocked_value = (
"ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)"
)
value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001
assert value == "ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)"

class TestValidatedXMLSourceRecord:
@ValidateGeoshapeWKT
def _dcat_bbox(self):
return "ENVELOPE()"

value = TestValidatedXMLSourceRecord()._dcat_bbox() # noqa: SLF001
invalid_value_warning_message = (
"field: dcat_bbox, shapely was unable to parse WKT: 'ENVELOPE()'; "
def test_validator_envelope_missing_vertices_logs_warning_sets_value_to_none(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")
mocked_validated_fields_source_record.mocked_value = "ENVELOPE()"
value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001
invalid_wkt_warning_message = (
"field: dcat_bbox, shapely was unable to parse string as WKT: 'ENVELOPE()'; "
"setting value to None"
)
assert value is None
assert invalid_value_warning_message in caplog.text
assert invalid_wkt_warning_message in caplog.text


def test_validator_envelope_insufficient_vertices_logs_warning_and_sets_value_to_none(
caplog,
def test_validator_envelope_insufficient_vertices_logs_warning_sets_value_to_none(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")

class TestValidatedXMLSourceRecord:
@ValidateGeoshapeWKT
def _dcat_bbox(self):
return "ENVELOPE(71.0589, 74.0060, 42.3601)"

value = TestValidatedXMLSourceRecord()._dcat_bbox() # noqa: SLF001
invalid_value_warning_message = (
"field: dcat_bbox, shapely was unable to parse WKT: "
mocked_validated_fields_source_record.mocked_value = (
"ENVELOPE(71.0589, 74.0060, 42.3601)"
)
value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001
invalid_wkt_warning_message = (
"field: dcat_bbox, shapely was unable to parse string as WKT: "
"'ENVELOPE(71.0589, 74.0060, 42.3601)'; "
"setting value to None"
)
assert value is None
assert invalid_value_warning_message in caplog.text
assert invalid_wkt_warning_message in caplog.text


def test_validator_polygon_returns_original_value_success(
caplog,
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")
polygon_wkt = (
"POLYGON ((74.0060 40.7128, 71.0589 42.3601, "
"73.7562 42.6526, 74.0060 40.7128))"
)

class TestValidatedXMLSourceRecord:
@ValidateGeoshapeWKT
def _locn_geometry(self):
return polygon_wkt

value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001
mocked_validated_fields_source_record.mocked_value = polygon_wkt
value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001
assert value == polygon_wkt


def test_validator_polygon_missing_vertices_logs_warning_and_sets_value_to_none(
caplog,
def test_validator_polygon_missing_vertices_logs_warning_sets_value_to_none(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")

class TestValidatedXMLSourceRecord:
@ValidateGeoshapeWKT
def _locn_geometry(self):
return "POLYGON (())"

value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001
invalid_value_warning_message = (
"field: locn_geometry, shapely was unable to parse WKT: 'POLYGON (())'; "
mocked_validated_fields_source_record.mocked_value = "POLYGON (())"
value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001
invalid_wkt_warning_message = (
"field: dcat_bbox, shapely was unable to parse string as WKT: 'POLYGON (())'; "
"setting value to None"
)
assert value is None
assert invalid_value_warning_message in caplog.text
assert invalid_wkt_warning_message in caplog.text


def test_validator_multipolygon_returns_original_value_success(
caplog,
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")
multipolygon_wkt = (
Expand All @@ -101,31 +119,21 @@ def test_validator_multipolygon_returns_original_value_success(
"41.7658 72.6734)), ((73.9776 40.7614, 73.9554 40.7827, 73.9631 40.7812, "
"73.9776 40.7614)))"
)

class TestValidatedXMLSourceRecord:
@ValidateGeoshapeWKT
def _locn_geometry(self):
return multipolygon_wkt

value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001
mocked_validated_fields_source_record.mocked_value = multipolygon_wkt
value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001
assert value == multipolygon_wkt


def test_validator_multipolygon_missing_vertices_logs_warning_and_sets_value_to_none(
caplog,
def test_validator_multipolygon_missing_vertices_logs_warning_sets_value_to_none(
caplog, mocked_validated_fields_source_record
):
caplog.set_level("DEBUG")

class TestValidatedXMLSourceRecord:
@ValidateGeoshapeWKT
def _locn_geometry(self):
return "MULTIPOLYGON (((), ()), (()))"

value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001
invalid_value_warning_message = (
"field: locn_geometry, shapely was unable to parse WKT: "
mocked_validated_fields_source_record.mocked_value = "MULTIPOLYGON (((), ()), (()))"
value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001
invalid_wkt_warning_message = (
"field: dcat_bbox, shapely was unable to parse string as WKT: "
"'MULTIPOLYGON (((), ()), (()))'; "
"setting value to None"
)
assert value is None
assert invalid_value_warning_message in caplog.text
assert invalid_wkt_warning_message in caplog.text

0 comments on commit 895b4ca

Please sign in to comment.