MITLibraries · ehanson8 · Dec 20, 2023 · Dec 19, 2023 · Dec 20, 2023 · Dec 20, 2023
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -48,10 +48,8 @@ def runner():
 
 @pytest.fixture
 def aardvark_record_all_fields():
-    return next(
-        JsonTransformer.parse_source_file(
-            "tests/fixtures/aardvark/aardvark_record_all_fields.jsonl"
-        )
+    return JsonTransformer.parse_source_file(
+        "tests/fixtures/aardvark/aardvark_record_all_fields.jsonl"
     )
 
 

diff --git a/tests/fixtures/aardvark/aardvark_record_all_fields.jsonl b/tests/fixtures/aardvark/aardvark_record_all_fields.jsonl
@@ -1 +1 @@
-{"id": "123", "dcat_keyword_sm": ["Country"], "dcat_theme_sm": ["Political boundaries"], "dct_spatial_sm": ["Some city, Some country"], "dct_subject_sm": ["Geography", "Earth"], "gbl_resourceClass_sm": ["Dataset"], "gbl_resourceType_sm": ["Vector data"], "dct_title_s": "Test title 1"}
+{"id": "123", "dcat_bbox": "ENVELOPE(-111.1, -104.0, 45.0, 40.9)", "dcat_keyword_sm": ["Country"], "dcat_theme_sm": ["Political boundaries"], "dct_accessRights_s": "Access note", "dct_alternative_sm": ["Alternate title"], "dct_creator_sm": ["Smith, Jane", "Smith, John"], "dct_description_sm": ["A description"], "dct_format_s": "Shapefile", "dct_language_sm": ["eng"], "dct_license_sm": ["http://license.license", "http://another_license.another_license"], "dct_publisher_sm": ["ML InfoMap (Firm)"], "dct_rights_sm": ["Some person has the rights"], "dct_rightsHolder_sm": ["The person with the rights", "Another person with the rights"], "dct_spatial_sm": ["Some city, Some country"], "dct_subject_sm": ["Geography", "Earth"], "dct_title_s": "Test title 1", "gbl_displayNote_sm": ["Danger: This text will be displayed in a red box","Info: This text will be displayed in a blue box","Tip: This text will be displayed in a green box","Warning: This text will be displayed in a yellow box","This is text without a tag and it will be assigned default 'note' style"], "gbl_resourceClass_sm": ["Dataset"], "gbl_resourceType_sm": ["Vector data"], "locn_geometry": "POLYGON((-80 25, -65 18, -64 33, -80 25))", "schema_provider_s": "MIT"}
diff --git a/tests/sources/json/test_aardvark.py b/tests/sources/json/test_aardvark.py
@@ -12,7 +12,7 @@ def test_aardvark_get_required_fields_returns_expected_values(aardvark_records):
     }
 
 
-def test_jsontransformer_transform_returns_timdex_record(aardvark_records):
+def test_aardvark_transform_returns_timdex_record(aardvark_records):
     transformer = MITAardvark("cool-repo", aardvark_records)
     assert next(transformer) == timdex.TimdexRecord(
         source="A Cool Repository",
@@ -24,16 +24,92 @@ def test_jsontransformer_transform_returns_timdex_record(aardvark_records):
     )
 
 
+def test_aardvark_get_optional_fields_non_field_method_values_success(
+    aardvark_record_all_fields,
+):
+    transformer = MITAardvark("cool-repo", aardvark_record_all_fields)
+    record = next(transformer)
+    assert record.format == "Shapefile"
+    assert record.languages == ["eng"]
+    assert record.summary == ["A description"]
+
+
 def test_aardvark_get_main_titles_success(aardvark_record_all_fields):
-    assert MITAardvark.get_main_titles(aardvark_record_all_fields) == ["Test title 1"]
+    assert MITAardvark.get_main_titles(next(aardvark_record_all_fields)) == [
+        "Test title 1"
+    ]
 
 
 def test_aardvark_get_source_record_id_success(aardvark_record_all_fields):
-    assert MITAardvark.get_source_record_id(aardvark_record_all_fields) == "123"
+    assert MITAardvark.get_source_record_id(next(aardvark_record_all_fields)) == "123"
+
+
+def test_aardvark_get_alternate_titles_success(aardvark_record_all_fields):
+    assert MITAardvark.get_alternate_titles(next(aardvark_record_all_fields)) == [
+        timdex.AlternateTitle(value="Alternate title")
+    ]
+
+
+def test_aardvark_get_contributors_success(aardvark_record_all_fields):
+    assert MITAardvark.get_contributors(next(aardvark_record_all_fields)) == [
+        timdex.Contributor(
+            value="Smith, Jane",
+            kind="Creator",
+        ),
+        timdex.Contributor(
+            value="Smith, John",
+            kind="Creator",
+        ),
+    ]
+
+
+def test_aardvark_get_notes_success(aardvark_record_all_fields):
+    assert MITAardvark.get_notes(next(aardvark_record_all_fields)) == [
+        timdex.Note(
+            value=["Danger: This text will be displayed in a red box"],
+            kind="Display note",
+        ),
+        timdex.Note(
+            value=["Info: This text will be displayed in a blue box"],
+            kind="Display note",
+        ),
+        timdex.Note(
+            value=["Tip: This text will be displayed in a green box"],
+            kind="Display note",
+        ),
+        timdex.Note(
+            value=["Warning: This text will be displayed in a yellow box"],
+            kind="Display note",
+        ),
+        timdex.Note(
+            value=[
+                "This is text without a tag and it will be assigned default 'note' style"
+            ],
+            kind="Display note",
+        ),
+    ]
+
+
+def test_aardvark_get_publication_information_success(aardvark_record_all_fields):
+    assert MITAardvark.get_publication_information(
+        next(aardvark_record_all_fields)
+    ) == ["ML InfoMap (Firm)", "MIT"]
+
+
+def test_aardvark_get_rights_success(aardvark_record_all_fields):
+    assert MITAardvark.get_rights(next(aardvark_record_all_fields)) == [
+        timdex.Rights(description="Access note", kind="Access"),
+        timdex.Rights(uri="http://license.license"),
+        timdex.Rights(uri="http://another_license.another_license"),
+        timdex.Rights(description="Some person has the rights"),
+        timdex.Rights(
+            description="The person with the rights. Another person with the rights"
+        ),
+    ]
 
 
 def test_aardvark_get_subjects_success(aardvark_record_all_fields):
-    assert MITAardvark.get_subjects(aardvark_record_all_fields) == [
+    assert MITAardvark.get_subjects(next(aardvark_record_all_fields)) == [
         timdex.Subject(value=["Country"], kind="DCAT Keyword"),
         timdex.Subject(value=["Political boundaries"], kind="DCAT Theme"),
         timdex.Subject(value=["Geography"], kind="Dublin Core Subject"),

diff --git a/transmogrifier/sources/json/aardvark.py b/transmogrifier/sources/json/aardvark.py
@@ -51,30 +51,32 @@ def record_is_deleted(cls, source_record: dict) -> bool:
 
     def get_optional_fields(self, source_record: dict) -> dict | None:
         """
-        Retrieve optional TIMDEX fields from a Aardvar JSON record.
+        Retrieve optional TIMDEX fields from an Aardvark JSON record.
 
         Overrides metaclass get_optional_fields() method.
 
         Args:
-            xml: A BeautifulSoup Tag representing a single Datacite record in
-                oai_datacite XML.
+            source_record: A JSON object representing a source record.
         """
         fields: dict = {}
 
         # alternate_titles
+        fields["alternate_titles"] = self.get_alternate_titles(source_record) or None
 
         # content_type
         fields["content_type"] = ["Geospatial data"]
 
         # contributors
+        fields["contributors"] = self.get_contributors(source_record) or None
 
         # dates
 
-        # edition
+        # edition not used in MITAardvark
 
         # format
+        fields["format"] = source_record.get("dct_format_s")
 
-        # funding_information
+        # funding_information not used in MITAardvark
 
         # identifiers
 
@@ -86,19 +88,92 @@ def get_optional_fields(self, source_record: dict) -> dict | None:
         # locations
 
         # notes
+        fields["notes"] = self.get_notes(source_record) or None
 
         # publication_information
+        fields["publication_information"] = (
+            self.get_publication_information(source_record) or None
+        )
 
-        # related_items
+        # related_items not used in MITAardvark
 
         # rights
+        fields["rights"] = self.get_rights(source_record) or None
 
         # subjects
         fields["subjects"] = self.get_subjects(source_record) or None
 
         # summary field
+        fields["summary"] = source_record.get("dct_description_sm")
+
         return fields
 
+    @staticmethod
+    def get_alternate_titles(source_record: dict) -> list[timdex.AlternateTitle]:
+        """Get values from source record for TIMDEX alternate_titles field."""
+        return [
+            timdex.AlternateTitle(value=title_value)
+            for title_value in source_record.get("dct_alternative_sm", [])
+        ]
+
+    @staticmethod
+    def get_contributors(source_record: dict) -> list[timdex.Contributor]:
+        """Get values from source record for TIMDEX contributors field."""
+        return [
+            timdex.Contributor(value=contributor_value, kind="Creator")
+            for contributor_value in source_record.get("dct_creator_sm", [])
+        ]
+
+    @staticmethod
+    def get_notes(source_record: dict) -> list[timdex.Note]:
+        """Get values from source record for TIMDEX notes field."""
+        return [
+            timdex.Note(value=[note_value], kind="Display note")
+            for note_value in source_record.get("gbl_displayNote_sm", [])
+        ]
+
+    @staticmethod
+    def get_publication_information(source_record: dict) -> list[str]:
+        """Get values from source record for TIMDEX publication_information field."""
+        publication_information = []
+
+        if "dct_publisher_sm" in source_record:
+            publication_information.extend(source_record["dct_publisher_sm"])
+
+        if "schema_provider_s" in source_record:
+            publication_information.append(source_record["schema_provider_s"])
+
+        return publication_information
+
+    @staticmethod
+    def get_rights(source_record: dict) -> list[timdex.Rights]:
+        """Get values from source record for TIMDEX rights field."""
+        rights = []
+
+        if "dct_accessRights_s" in source_record:
+            rights.append(
+                timdex.Rights(
+                    description=source_record["dct_accessRights_s"], kind="Access"
+                )
+            )
+
+        rights.extend(
+            [
+                timdex.Rights(uri=rights_uri_value)
+                for rights_uri_value in source_record.get("dct_license_sm", [])
+            ]
+        )
+
+        for aardvark_rights_field in ["dct_rights_sm", "dct_rightsHolder_sm"]:
+            if aardvark_rights_field in source_record:
+                rights.append(
+                    timdex.Rights(
+                        description=". ".join(source_record[aardvark_rights_field])
+                    )
+                )
+
+        return rights
+
     @staticmethod
     def get_subjects(source_record: dict) -> list[timdex.Subject]:
         """Get values from source record for TIMDEX subjects field.
@@ -115,18 +190,21 @@ def get_subjects(source_record: dict) -> list[timdex.Subject]:
             source_record: A JSON object representing a source record.
         """
         subjects = []
+
         aardvark_subject_fields = {
             "dcat_keyword_sm": "DCAT Keyword",
             "dcat_theme_sm": "DCAT Theme",
             "dct_subject_sm": "Dublin Core Subject",
             "gbl_resourceClass_sm": "Subject scheme not provided",
             "gbl_resourceType_sm": "Subject scheme not provided",
         }
+
         for aardvark_subject_field, kind_value in {
             key: value
             for key, value in aardvark_subject_fields.items()
             if key in source_record
         }.items():
             for subject in source_record[aardvark_subject_field]:
                 subjects.append(timdex.Subject(value=[subject], kind=kind_value))
+
         return subjects