Rename xml > source_record

MITLibraries · May 2, 2024 · cee6ffc · cee6ffc
1 parent 7f742e2
commit cee6ffc
Showing 1 changed file with 43 additions and 30 deletions.
diff --git a/transmogrifier/sources/xml/dspace_dim.py b/transmogrifier/sources/xml/dspace_dim.py
@@ -13,23 +13,24 @@
 class DspaceDim(XMLTransformer):
     """DSpace DIM transformer."""
 
-    def get_optional_fields(self, xml: Tag) -> dict | None:
+    def get_optional_fields(self, source_record: Tag) -> dict | None:
         """
         Retrieve optional TIMDEX fields from a DSpace DIM XML record.
 
         Overrides metaclass get_optional_fields() method.
 
         Args:
-            xml: A BeautifulSoup Tag representing a single DSpace DIM XML record.
+            source_record: A BeautifulSoup Tag representing a single DSpace DIM XML
+            record.
         """
         fields: dict = {}
 
-        source_record_id = self.get_source_record_id(xml)
+        source_record_id = self.get_source_record_id(source_record)
 
         # alternate_titles
         for alternate_title in [
             t
-            for t in xml.find_all("dim:field", element="title")
+            for t in source_record.find_all("dim:field", element="title")
             if "qualifier" in t.attrs and t.string
         ]:
             fields.setdefault("alternate_titles", []).append(
@@ -39,29 +40,31 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
                 )
             )
         # If the record has more than one main title, add extras to alternate_titles
-        for index, title in enumerate(self.get_main_titles(xml)):
+        for index, title in enumerate(self.get_main_titles(source_record)):
             if index > 0:
                 fields.setdefault("alternate_titles", []).append(
                     timdex.AlternateTitle(value=title)
                 )
 
         # citation
-        citation = xml.find("dim:field", element="identifier", qualifier="citation")
+        citation = source_record.find(
+            "dim:field", element="identifier", qualifier="citation"
+        )
         fields["citation"] = citation.string if citation and citation.string else None
 
         # content_type
-        if content_types := self.get_content_types(xml):
+        if content_types := self.get_content_types(source_record):
             if self.valid_content_types(content_types):
                 fields["content_type"] = content_types
             else:
                 return None
 
         # contents
-        fields["contents"] = self.get_contents(xml) or None
+        fields["contents"] = self.get_contents(source_record) or None
 
         # contributors
         for creator in [
-            c for c in xml.find_all("dim:field", element="creator") if c.string
+            c for c in source_record.find_all("dim:field", element="creator") if c.string
         ]:
             fields.setdefault("contributors", []).append(
                 timdex.Contributor(
@@ -71,7 +74,9 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
             )
 
         for contributor in [
-            c for c in xml.find_all("dim:field", element="contributor") if c.string
+            c
+            for c in source_record.find_all("dim:field", element="contributor")
+            if c.string
         ]:
             fields.setdefault("contributors", []).append(
                 timdex.Contributor(
@@ -81,12 +86,12 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
             )
 
         # dates
-        fields["dates"] = self.get_dates(xml, source_record_id) or None
+        fields["dates"] = self.get_dates(source_record, source_record_id) or None
 
         # file_formats
         fields["file_formats"] = [
             f.string
-            for f in xml.find_all("dim:field", element="format")
+            for f in source_record.find_all("dim:field", element="format")
             if f.get("qualifier") == "mimetype" and f.string
         ] or None
 
@@ -96,7 +101,7 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
         # funding_information
         for funding_reference in [
             f
-            for f in xml.find_all(
+            for f in source_record.find_all(
                 "dim:field", element="description", qualifier="sponsorship"
             )
             if f.string
@@ -108,7 +113,7 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
             )
 
         # identifiers
-        identifiers = xml.find_all("dim:field", element="identifier")
+        identifiers = source_record.find_all("dim:field", element="identifier")
         for identifier in [
             i for i in identifiers if i.get("qualifier") != "citation" and i.string
         ]:
@@ -121,7 +126,9 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
 
         # language
         fields["languages"] = [
-            la.string for la in xml.find_all("dim:field", element="language") if la.string
+            la.string
+            for la in source_record.find_all("dim:field", element="language")
+            if la.string
         ] or None
 
         # links, uses identifiers list retrieved for identifiers field
@@ -139,12 +146,14 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
         # locations
         fields["locations"] = [
             timdex.Location(value=lo.string)
-            for lo in xml.find_all("dim:field", element="coverage", qualifier="spatial")
+            for lo in source_record.find_all(
+                "dim:field", element="coverage", qualifier="spatial"
+            )
             if lo.string
         ] or None
 
         # notes
-        descriptions = xml.find_all("dim:field", element="description")
+        descriptions = source_record.find_all("dim:field", element="description")
         for description in [
             d
             for d in descriptions
@@ -167,13 +176,13 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
         # publishers
         fields["publishers"] = [
             timdex.Publisher(name=p.string)
-            for p in xml.find_all("dim:field", element="publisher")
+            for p in source_record.find_all("dim:field", element="publisher")
             if p.string
         ] or None
 
         # related_items
         for related_item in [
-            r for r in xml.find_all("dim:field", element="relation") if r.string
+            r for r in source_record.find_all("dim:field", element="relation") if r.string
         ]:
             if related_item.get("qualifier") == "uri":
                 ri = timdex.RelatedItem(
@@ -188,7 +197,7 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
 
         # rights
         for rights in [
-            r for r in xml.find_all("dim:field", element="rights") if r.string
+            r for r in source_record.find_all("dim:field", element="rights") if r.string
         ]:
             if rights.get("qualifier") == "uri":
                 rg = timdex.Rights(uri=rights.string)
@@ -201,7 +210,7 @@ def get_optional_fields(self, xml: Tag) -> dict | None:
         # subjects
         subjects_dict: dict[str, list[str]] = {}
         for subject in [
-            s for s in xml.find_all("dim:field", element="subject") if s.string
+            s for s in source_record.find_all("dim:field", element="subject") if s.string
         ]:
             if not subject.get("qualifier"):
                 subjects_dict.setdefault("Subject scheme not provided", []).append(
@@ -298,47 +307,51 @@ def _parse_date_range(
         return None
 
     @classmethod
-    def get_content_types(cls, xml: Tag) -> list[str] | None:
+    def get_content_types(cls, source_record: Tag) -> list[str] | None:
         """
         Retrieve content types from a DSpace DIM XML record.
 
         May be overridden by source subclasses that retrieve content type values
         differently.
 
         Args:
-            xml: A BeautifulSoup Tag representing a single DSpace DIM XML record.
+            source_record: A BeautifulSoup Tag representing a single DSpace DIM XML
+            record.
         """
         return [
-            t.string for t in xml.find_all("dim:field", element="type", string=True)
+            t.string
+            for t in source_record.find_all("dim:field", element="type", string=True)
         ] or None
 
     @classmethod
-    def get_main_titles(cls, xml: Tag) -> list[str]:
+    def get_main_titles(cls, source_record: Tag) -> list[str]:
         """
         Retrieve main title(s) from a DSpace DIM XML record.
 
         Overrides metaclass get_main_titles() method.
 
         Args:
-            xml: A BeautifulSoup Tag representing a single DSpace DIM XML record.
+            source_record: A BeautifulSoup Tag representing a single DSpace DIM XML
+            record.
         """
         return [
             t.string
-            for t in xml.find_all("dim:field", element="title", string=True)
+            for t in source_record.find_all("dim:field", element="title", string=True)
             if "qualifier" not in t.attrs
         ]
 
     @classmethod
-    def get_source_record_id(cls, xml: Tag) -> str:
+    def get_source_record_id(cls, source_record: Tag) -> str:
         """
         Get the source record ID from a DSpace DIM XML record.
 
         Overrides metaclass get_source_record_id() method.
 
         Args:
-            xml: A BeautifulSoup Tag representing a single DSpace DIM XML record.
+            source_record: A BeautifulSoup Tag representing a single DSpace DIM XML
+            record.
         """
-        return xml.header.identifier.string.split(":")[2]
+        return source_record.header.identifier.string.split(":")[2]
 
     @classmethod
     def valid_content_types(cls, _content_type_list: list[str]) -> bool: