diff --git a/tests/fixtures/ead/ead_record_all_fields.xml b/tests/fixtures/ead/ead_record_all_fields.xml index 7bbed6a..ef44bf5 100644 --- a/tests/fixtures/ead/ead_record_all_fields.xml +++ b/tests/fixtures/ead/ead_record_all_fields.xml @@ -2,7 +2,7 @@
- oai:mit//repositories/2/resources/1 + oai:mit/repositories/2/resources/1 2021-11-06T14:35:55Z
diff --git a/tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml b/tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml index d3f959b..6da95a9 100644 --- a/tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml +++ b/tests/fixtures/ead/ead_record_attribute_and_subfield_variations.xml @@ -2,7 +2,7 @@
- oai:mit//repositories/2/resources/6 + oai:mit/repositories/2/resources/6 2021-11-06T14:35:55Z
diff --git a/tests/fixtures/ead/ead_record_blank_optional_fields.xml b/tests/fixtures/ead/ead_record_blank_optional_fields.xml index 3512e28..b98fb4b 100644 --- a/tests/fixtures/ead/ead_record_blank_optional_fields.xml +++ b/tests/fixtures/ead/ead_record_blank_optional_fields.xml @@ -2,7 +2,7 @@
- oai:mit//repositories/2/resources/2 + oai:mit/repositories/2/resources/2 2021-11-06T14:35:55Z
diff --git a/tests/fixtures/ead/ead_record_missing_archdesc.xml b/tests/fixtures/ead/ead_record_missing_archdesc.xml index b69bf65..71045a5 100644 --- a/tests/fixtures/ead/ead_record_missing_archdesc.xml +++ b/tests/fixtures/ead/ead_record_missing_archdesc.xml @@ -2,7 +2,7 @@
- oai:mit//repositories/2/resources/4 + oai:mit/repositories/2/resources/4 2021-11-06T14:35:55Z
diff --git a/tests/fixtures/ead/ead_record_missing_archdesc_did.xml b/tests/fixtures/ead/ead_record_missing_archdesc_did.xml index 74cd87d..e9c67d8 100644 --- a/tests/fixtures/ead/ead_record_missing_archdesc_did.xml +++ b/tests/fixtures/ead/ead_record_missing_archdesc_did.xml @@ -2,7 +2,7 @@
- oai:mit//repositories/2/resources/3 + oai:mit/repositories/2/resources/3 2021-11-06T14:35:55Z
diff --git a/tests/fixtures/ead/ead_record_missing_optional_fields.xml b/tests/fixtures/ead/ead_record_missing_optional_fields.xml index e59afc5..dd897e6 100644 --- a/tests/fixtures/ead/ead_record_missing_optional_fields.xml +++ b/tests/fixtures/ead/ead_record_missing_optional_fields.xml @@ -2,7 +2,7 @@
- oai:mit//repositories/2/resources/5 + oai:mit/repositories/2/resources/5 2021-11-06T14:35:55Z
diff --git a/tests/sources/xml/test_ead.py b/tests/sources/xml/test_ead.py index 9d67036..5141183 100644 --- a/tests/sources/xml/test_ead.py +++ b/tests/sources/xml/test_ead.py @@ -880,7 +880,7 @@ def test_get_dates_success(): source_record = create_ead_source_record_stub( header_insert=( """ - oai:mit//repositories/2/resources/1 + oai:mit/repositories/2/resources/1 """ ), metadata_insert=( @@ -907,7 +907,7 @@ def test_get_dates_transforms_correctly_if_fields_blank(): source_record = create_ead_source_record_stub( header_insert=( """ - oai:mit//repositories/2/resources/1 + oai:mit/repositories/2/resources/1 """ ), metadata_insert=( @@ -924,7 +924,7 @@ def test_get_dates_transforms_correctly_if_fields_missing(): source_record = create_ead_source_record_stub( header_insert=( """ - oai:mit//repositories/2/resources/1 + oai:mit/repositories/2/resources/1 """ ), parent_element="did", @@ -936,7 +936,7 @@ def test_get_dates_transforms_correctly_if_date_invalid(): source_record = create_ead_source_record_stub( header_insert=( """ - oai:mit//repositories/2/resources/1 + oai:mit/repositories/2/resources/1 """ ), metadata_insert=( @@ -955,7 +955,7 @@ def test_get_dates_transforms_correctly_if_normal_attribute_missing(): source_record = create_ead_source_record_stub( header_insert=( """ - oai:mit//repositories/2/resources/1 + oai:mit/repositories/2/resources/1 """ ), metadata_insert=( diff --git a/transmogrifier/sources/transformer.py b/transmogrifier/sources/transformer.py index 356ffe9..2583f48 100644 --- a/transmogrifier/sources/transformer.py +++ b/transmogrifier/sources/transformer.py @@ -107,10 +107,10 @@ def __next__(self) -> DatasetRecord: self.skipped_record_count += 1 action = "skip" - except Exception as exception: # noqa: BLE001 + except Exception as exception: self.error_record_count += 1 message = f"Unhandled exception during record transformation: {exception}" - logger.warning(message) + logger.exception(message) action = "error" return DatasetRecord( diff --git a/transmogrifier/sources/xml/ead.py b/transmogrifier/sources/xml/ead.py index 971588a..d435a85 100644 --- a/transmogrifier/sources/xml/ead.py +++ b/transmogrifier/sources/xml/ead.py @@ -1,4 +1,5 @@ import logging +import re from collections.abc import Generator from bs4 import NavigableString, Tag # type: ignore[import-untyped] @@ -549,7 +550,14 @@ def get_source_record_id(cls, source_record: Tag) -> str: Args: source_record: A BeautifulSoup Tag representing a single EAD XML record. """ - return source_record.header.identifier.string.split("//")[1] + matches = re.match(r"oai:mit/+(.*)", source_record.header.identifier.string) + if not matches: + message = ( + "Could not parse TIMDEX identifier from OAI identifier: " + f"'{source_record.header.identifier.string}'" + ) + raise ValueError(message) + return matches.groups()[0] @classmethod def parse_mixed_value(