diff --git a/tests/fixtures/aardvark_records.jsonl b/tests/fixtures/aardvark_records.jsonl index 4946524..4c89e60 100644 --- a/tests/fixtures/aardvark_records.jsonl +++ b/tests/fixtures/aardvark_records.jsonl @@ -1,2 +1,2 @@ -{"dct_accessRights_s": "Access rights", "dct_references_s": "", "dct_title_s": "Test title 1", "gbl_mdModified_dt": "", "gbl_mdVersion_s": "", "gbl_resourceClass_sm": "", "id": "mit:123", "locn_geometry": ""} -{"dct_accessRights_s": "Access rights", "dct_references_s": "", "dct_title_s": "Test title 2", "gbl_mdModified_dt": "", "gbl_mdVersion_s": "", "gbl_resourceClass_sm": "", "id": "ogm:456", "locn_geometry": ""} \ No newline at end of file +{"dct_accessRights_s": "Access rights", "dct_references_s": "", "dct_title_s": "Test title 1", "gbl_mdModified_dt": "", "gbl_mdVersion_s": "", "gbl_resourceClass_sm": "", "id": "mit:123", "locn_geometry": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)"} +{"dct_accessRights_s": "Access rights", "dct_references_s": "", "dct_title_s": "Test title 2", "gbl_mdModified_dt": "", "gbl_mdVersion_s": "", "gbl_resourceClass_sm": "", "id": "ogm:456", "locn_geometry": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)"} \ No newline at end of file diff --git a/tests/sources/json/test_aardvark.py b/tests/sources/json/test_aardvark.py index 5779b5e..a688530 100644 --- a/tests/sources/json/test_aardvark.py +++ b/tests/sources/json/test_aardvark.py @@ -75,6 +75,7 @@ def test_aardvark_get_dates_success(aardvark_record_all_fields): timdex.Date(kind="Coverage", value="1945"), timdex.Date(kind="Coverage", value="1946"), timdex.Date( + kind="Coverage", range=timdex.Date_Range(gte="1943", lte="1946"), ), ] @@ -97,7 +98,7 @@ def test_parse_solr_date_range_invalid_date_range_string_raises_error(): def test_aardvark_get_identifiers_success(aardvark_record_all_fields): assert MITAardvark.get_identifiers(next(aardvark_record_all_fields)) == [ - timdex.Identifier(value="abc123") + timdex.Identifier(value="abc123", kind="Not specified") ] @@ -129,11 +130,11 @@ def test_aardvark_get_links_logs_warning_for_invalid_json(caplog): ) -def test_aardvark_get_locations_success(aardvark_record_all_fields): - assert MITAardvark.get_locations(next(aardvark_record_all_fields), "123") == [ - timdex.Location(kind="Bounding Box", geodata=[-111.1, -104.0, 45.0, 40.9]), - timdex.Location(kind="Geometry", geodata=[-111.1, -104.0, 45.0, 40.9]), - ] +def test_aardvark_get_locations_success(caplog, aardvark_record_all_fields): + caplog.set_level("DEBUG") + assert "Geometry field 'dcat_bbox' found, but currently not mapped." + assert "Geometry field 'locn_geometry' found, but currently not mapped." + assert MITAardvark.get_locations(next(aardvark_record_all_fields), "123") == [] def test_aardvark_get_notes_success(aardvark_record_all_fields): diff --git a/transmogrifier/sources/json/aardvark.py b/transmogrifier/sources/json/aardvark.py index e6b8416..5e5f971 100644 --- a/transmogrifier/sources/json/aardvark.py +++ b/transmogrifier/sources/json/aardvark.py @@ -3,7 +3,6 @@ import re import transmogrifier.models as timdex -from transmogrifier.helpers import parse_geodata_string from transmogrifier.sources.transformer import JSON, JSONTransformer logger = logging.getLogger(__name__) @@ -228,9 +227,10 @@ def _range_dates( ) range_dates.append( timdex.Date( + kind="Coverage", range=timdex.Date_Range( gte=date_range_values[0], lte=date_range_values[1] - ) + ), ) ) return range_dates @@ -262,7 +262,7 @@ def parse_solr_date_range_string( def get_identifiers(source_record: dict) -> list[timdex.Identifier]: """Get values from source record for TIMDEX identifiers field.""" return [ - timdex.Identifier(value=identifier_value) + timdex.Identifier(value=identifier_value, kind="Not specified") for identifier_value in source_record.get("dct_identifier_sm", []) ] @@ -292,8 +292,13 @@ def get_links(source_record: dict, source_record_id: str) -> list[timdex.Link]: def get_locations( source_record: dict, source_record_id: str ) -> list[timdex.Location]: - """Get values from source record for TIMDEX locations field.""" - locations = [] + """Get values from source record for TIMDEX locations field. + + WIP: Currently in the process of determining our approach for storing geographic + geometry data in the TIMDEX record and how this dovetails with the OpenSearch + mapping. At this time, this method returns an empty list of Locations. + """ + locations: list[timdex.Location] = [] aardvark_location_fields = { "dcat_bbox": "Bounding Box", @@ -303,15 +308,11 @@ def get_locations( if aardvark_location_field not in source_record: continue try: - if geodata_points := parse_geodata_string( - source_record[aardvark_location_field], source_record_id - ): - locations.append( - timdex.Location( - geodata=geodata_points, - kind=kind_value, - ) - ) + message = ( + f"Geometry field '{aardvark_location_field}' found, but " + f"currently not mapped." + ) + logger.debug(message) except ValueError as exception: logger.warning(exception) return locations