From 895b4ca8b8eca60390593f7b7e5334f951b25401 Mon Sep 17 00:00:00 2001 From: jonavellecuerdo Date: Fri, 9 Feb 2024 11:20:25 -0500 Subject: [PATCH] Address comments * Create a fixture mocking a SourceRecord with validated fields * Revise ValidateGeoshapeWKT validator to handle different cases: * field method returns invalid string value (shapely cannot parse) * field method returns Nonetype value * field method returns value that is (not None AND not string) * Update test suite as needed --- harvester/records/validators.py | 74 ++++++++++--- tests/conftest.py | 16 +++ tests/test_records/test_validator.py | 150 ++++++++++++++------------- 3 files changed, 155 insertions(+), 85 deletions(-) diff --git a/harvester/records/validators.py b/harvester/records/validators.py index d702bac..f70e5ae 100644 --- a/harvester/records/validators.py +++ b/harvester/records/validators.py @@ -24,51 +24,97 @@ # Data Validators ################### class ValidateGeoshapeWKT: - """Decorator class for validating geoshape WKT values. + """Decorator class for validating geoshape string values. The validator should be applied to any field methods that retrieve geoshape - WKT values. The validator logs a warning if the WKT value cannot be parsed - using the shapely module. If the WKT value cannot be parsed, the validator - resets the value to None. + string values. If the field method retrieves a string value, the validator + will determine if shapely can parse the string as WKT. + + Warning messages will be logged for the following scenarios: + + 1. Invalid WKT: Field method retrieves string value but shapely was unable to + parse string as WKT. Validator returns None. + + 2. Invalid None: Field method retrieves NoneType value. Validator returns None. + + 3. Invalid Type: Field method retrieves value that is neither of NoneType or a string + value. Validator returns None. + + The validator will return the original value only if shapely was able to successfully + create a geometry object from the string value retrieved by the field method. """ invalid_wkt_warning_message: str = ( - "field: {field_name}, shapely was unable to parse WKT: '{value}'; " + "field: {field_name}, shapely was unable to parse string as WKT: '{value}'; " + "setting value to None" + ) + invalid_none_warning_message: str = ( + "field: {field_name}, value of type NoneType was provided; " + "returning original value of None" + ) + invalid_type_warning_message: str = ( + "field: {field_name}, value of type {type} was provided: {value}; " "setting value to None" ) def __init__(self, field_method: Callable): update_wrapper(self, field_method) self.field_method = field_method + self.field_name = field_method.__name__.removeprefix("_") def __call__(self, obj: object) -> str | None: - field_name = self.field_method.__name__.removeprefix("_") + """Validate string values retrieved by field method.""" value = self.field_method(obj) + if isinstance(value, str): + return self.validate(value) + + if value is None: + logger.warning( + FieldValueInvalidWarning( + self.invalid_none_warning_message.format( + field_name=self.field_name, value=value + ) + ) + ) + return None + + logger.warning( + FieldValueInvalidWarning( + self.invalid_type_warning_message.format( + field_name=self.field_name, type=type(value), value=value + ) + ) + ) + return None + + def __get__(self, obj: object, _: type) -> partial[str | None]: + """Required by decorator to access SourceRecord instance""" + return partial(self.__call__, obj) + + def validate(self, value: str) -> str | None: + """Validate geoshape WKT string value with shapely. + + The geoshape WKT string value is considered valid if shapely can + successfully create a geometry object. + """ try: self.create_geoshape(value) except Exception: # noqa: BLE001 logger.warning( FieldValueInvalidWarning( self.invalid_wkt_warning_message.format( - field_name=field_name, value=value + field_name=self.field_name, value=value ) ) ) return None return value - def __get__(self, obj: object, _: type) -> partial[str | None]: - """Required by decorator to access SourceRecord instance""" - return partial(self.__call__, obj) - @staticmethod def create_geoshape(wkt: str) -> shapely.Geometry: """Run shapely to determine whether geoshape WKT value is valid. - If the geoshape WKT value is valid, shapely can successfully create a - geometry object. - Note: shapely does not currently support WKT values for bounding box regions or envelopes. The method uses regex to retrieving the vertices for a geoshape WKT value with the format: "ENVELOPE()". The regex retrieves the vertices diff --git a/tests/conftest.py b/tests/conftest.py index 83273fd..d444818 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,7 @@ from harvester.harvest.mit import MITHarvester from harvester.harvest.ogm import OGMHarvester, OGMRepository from harvester.records import FGDC, ISO19139, MITAardvark, Record, XMLSourceRecord +from harvester.records.validators import ValidateGeoshapeWKT @pytest.fixture(autouse=True) @@ -242,6 +243,21 @@ def _dct_references_s(self): ) +@pytest.fixture +def mocked_validated_fields_source_record(): + class TestValidatedSourceRecord: + mocked_value = "" + + @ValidateGeoshapeWKT + def _dcat_bbox(self): + return self.mocked_value + + def _locn_geometry(self): + return self._dcat_bbox() + + return TestValidatedSourceRecord + + @pytest.fixture def valid_mitaardvark_data_required_fields(): return { diff --git a/tests/test_records/test_validator.py b/tests/test_records/test_validator.py index c867dab..81df15f 100644 --- a/tests/test_records/test_validator.py +++ b/tests/test_records/test_validator.py @@ -1,98 +1,116 @@ -from harvester.records.validators import ValidateGeoshapeWKT +def test_validator_envelope_none_nonetype_skips_validation_logs_warning_returns_none( + caplog, mocked_validated_fields_source_record +): + caplog.set_level("DEBUG") + mocked_validated_fields_source_record.mocked_value = None + value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001 + invalid_none_warning_message = ( + "field: dcat_bbox, value of type NoneType was provided; " + "returning original value of None" + ) + assert value is None + assert invalid_none_warning_message in caplog.text -def test_validator_envelope_returns_original_value_success( - caplog, +def test_validator_envelope_none_string_skips_validation_logs_warning_returns_none( + caplog, mocked_validated_fields_source_record ): caplog.set_level("DEBUG") + mocked_validated_fields_source_record.mocked_value = "None" + value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001 + invalid_wkt_warning_message = ( + "field: dcat_bbox, shapely was unable to parse string as WKT: 'None'; " + "setting value to None" + ) + assert value is None + assert invalid_wkt_warning_message in caplog.text - class TestValidatedXMLSourceRecord: - @ValidateGeoshapeWKT - def _dcat_bbox(self): - return "ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)" - value = TestValidatedXMLSourceRecord()._dcat_bbox() # noqa: SLF001 - assert value == "ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)" +def test_validator_envelope_nonstring_skips_validation_logs_warning_returns_none( + caplog, mocked_validated_fields_source_record +): + caplog.set_level("DEBUG") + mocked_validated_fields_source_record.mocked_value = 999 + value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001 + invalid_type_warning_message = ( + "field: dcat_bbox, value of type was provided: 999; " + "setting value to None" + ) + assert value is None + assert invalid_type_warning_message in caplog.text -def test_validator_envelope_missing_vertices_logs_warning_and_sets_value_to_none( - caplog, +def test_validator_envelope_returns_original_value_success( + caplog, mocked_validated_fields_source_record ): caplog.set_level("DEBUG") + mocked_validated_fields_source_record.mocked_value = ( + "ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)" + ) + value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001 + assert value == "ENVELOPE(71.0589, 74.0060, 42.3601, 40.7128)" - class TestValidatedXMLSourceRecord: - @ValidateGeoshapeWKT - def _dcat_bbox(self): - return "ENVELOPE()" - value = TestValidatedXMLSourceRecord()._dcat_bbox() # noqa: SLF001 - invalid_value_warning_message = ( - "field: dcat_bbox, shapely was unable to parse WKT: 'ENVELOPE()'; " +def test_validator_envelope_missing_vertices_logs_warning_sets_value_to_none( + caplog, mocked_validated_fields_source_record +): + caplog.set_level("DEBUG") + mocked_validated_fields_source_record.mocked_value = "ENVELOPE()" + value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001 + invalid_wkt_warning_message = ( + "field: dcat_bbox, shapely was unable to parse string as WKT: 'ENVELOPE()'; " "setting value to None" ) assert value is None - assert invalid_value_warning_message in caplog.text + assert invalid_wkt_warning_message in caplog.text -def test_validator_envelope_insufficient_vertices_logs_warning_and_sets_value_to_none( - caplog, +def test_validator_envelope_insufficient_vertices_logs_warning_sets_value_to_none( + caplog, mocked_validated_fields_source_record ): caplog.set_level("DEBUG") - - class TestValidatedXMLSourceRecord: - @ValidateGeoshapeWKT - def _dcat_bbox(self): - return "ENVELOPE(71.0589, 74.0060, 42.3601)" - - value = TestValidatedXMLSourceRecord()._dcat_bbox() # noqa: SLF001 - invalid_value_warning_message = ( - "field: dcat_bbox, shapely was unable to parse WKT: " + mocked_validated_fields_source_record.mocked_value = ( + "ENVELOPE(71.0589, 74.0060, 42.3601)" + ) + value = mocked_validated_fields_source_record()._dcat_bbox() # noqa: SLF001 + invalid_wkt_warning_message = ( + "field: dcat_bbox, shapely was unable to parse string as WKT: " "'ENVELOPE(71.0589, 74.0060, 42.3601)'; " "setting value to None" ) assert value is None - assert invalid_value_warning_message in caplog.text + assert invalid_wkt_warning_message in caplog.text def test_validator_polygon_returns_original_value_success( - caplog, + caplog, mocked_validated_fields_source_record ): caplog.set_level("DEBUG") polygon_wkt = ( "POLYGON ((74.0060 40.7128, 71.0589 42.3601, " "73.7562 42.6526, 74.0060 40.7128))" ) - - class TestValidatedXMLSourceRecord: - @ValidateGeoshapeWKT - def _locn_geometry(self): - return polygon_wkt - - value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001 + mocked_validated_fields_source_record.mocked_value = polygon_wkt + value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001 assert value == polygon_wkt -def test_validator_polygon_missing_vertices_logs_warning_and_sets_value_to_none( - caplog, +def test_validator_polygon_missing_vertices_logs_warning_sets_value_to_none( + caplog, mocked_validated_fields_source_record ): caplog.set_level("DEBUG") - - class TestValidatedXMLSourceRecord: - @ValidateGeoshapeWKT - def _locn_geometry(self): - return "POLYGON (())" - - value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001 - invalid_value_warning_message = ( - "field: locn_geometry, shapely was unable to parse WKT: 'POLYGON (())'; " + mocked_validated_fields_source_record.mocked_value = "POLYGON (())" + value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001 + invalid_wkt_warning_message = ( + "field: dcat_bbox, shapely was unable to parse string as WKT: 'POLYGON (())'; " "setting value to None" ) assert value is None - assert invalid_value_warning_message in caplog.text + assert invalid_wkt_warning_message in caplog.text def test_validator_multipolygon_returns_original_value_success( - caplog, + caplog, mocked_validated_fields_source_record ): caplog.set_level("DEBUG") multipolygon_wkt = ( @@ -101,31 +119,21 @@ def test_validator_multipolygon_returns_original_value_success( "41.7658 72.6734)), ((73.9776 40.7614, 73.9554 40.7827, 73.9631 40.7812, " "73.9776 40.7614)))" ) - - class TestValidatedXMLSourceRecord: - @ValidateGeoshapeWKT - def _locn_geometry(self): - return multipolygon_wkt - - value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001 + mocked_validated_fields_source_record.mocked_value = multipolygon_wkt + value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001 assert value == multipolygon_wkt -def test_validator_multipolygon_missing_vertices_logs_warning_and_sets_value_to_none( - caplog, +def test_validator_multipolygon_missing_vertices_logs_warning_sets_value_to_none( + caplog, mocked_validated_fields_source_record ): caplog.set_level("DEBUG") - - class TestValidatedXMLSourceRecord: - @ValidateGeoshapeWKT - def _locn_geometry(self): - return "MULTIPOLYGON (((), ()), (()))" - - value = TestValidatedXMLSourceRecord()._locn_geometry() # noqa: SLF001 - invalid_value_warning_message = ( - "field: locn_geometry, shapely was unable to parse WKT: " + mocked_validated_fields_source_record.mocked_value = "MULTIPOLYGON (((), ()), (()))" + value = mocked_validated_fields_source_record()._locn_geometry() # noqa: SLF001 + invalid_wkt_warning_message = ( + "field: dcat_bbox, shapely was unable to parse string as WKT: " "'MULTIPOLYGON (((), ()), (()))'; " "setting value to None" ) assert value is None - assert invalid_value_warning_message in caplog.text + assert invalid_wkt_warning_message in caplog.text