Skip to content

Commit

Permalink
Skip deleted records during transform
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
As we are not yet handling deleted records, we need to skip them during
transform so they don't cause errors in the transform process. In a
future commit we will replace this with actual deleted record handling.

How this addresses that need:
* Adds a method in the Transformer class to check for deleted record
  status.
* Updates the transformer.transform method to return None (thus skipping
  the record) if the record status is deleted.
* Updates tests and fixtures to reflect changes.

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/TIMX-126
  • Loading branch information
hakbailey committed Nov 30, 2022
1 parent 9fa1492 commit 79d8bf6
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 3 deletions.
6 changes: 5 additions & 1 deletion tests/fixtures/oai_pmh_records.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,8 @@
<record xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
</record>
</metadata>
<record xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header status="deleted"></header>
</record>
</metadata>
8 changes: 8 additions & 0 deletions tests/fixtures/record_deleted.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<record xmlns="http://www.openarchives.org/OAI/2.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<header status="deleted">
<identifier>123456</identifier>
<datestamp>2022-11-30T16:53:47Z</datestamp>
</header>
</record>
14 changes: 12 additions & 2 deletions tests/test_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ def test_transformer_initializes_with_expected_attributes(oai_pmh_records):
def test_transformer_iterates_through_all_records(oai_pmh_records):
output_records = Transformer("cool-repo", oai_pmh_records)
assert len(list(output_records)) == 2
assert output_records.processed_record_count == 3
assert output_records.skipped_record_count == 1
assert output_records.transformed_record_count == 2


def test_transformer_iterates_successfully_if_get_optional_fields_returns_none(
Expand All @@ -28,11 +31,18 @@ def test_transformer_iterates_successfully_if_get_optional_fields_returns_none(
m.return_value = None
output_records = Transformer("cool-repo", oai_pmh_records)
assert len(list(output_records)) == 0
assert output_records.processed_record_count == 2
assert output_records.skipped_record_count == 2
assert output_records.processed_record_count == 3
assert output_records.skipped_record_count == 3
assert output_records.transformed_record_count == 0


def test_transformer_record_is_deleted_returns_true_if_deleted(caplog):
input_records = parse_xml_records("tests/fixtures/record_deleted.xml")
output_records = Datacite("cool-repo", input_records)
assert len(list(output_records)) == 0
assert "Skipping record 123456 with header status deleted" in caplog.text


def test_transformer_get_required_fields_returns_expected_values(oai_pmh_records):
transformer = Transformer("cool-repo", oai_pmh_records)
assert transformer.get_required_fields(next(oai_pmh_records)) == {
Expand Down
21 changes: 21 additions & 0 deletions transmogrifier/sources/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,25 @@ def get_source_record_id(cls, xml: Tag) -> str:
"""
return ""

@classmethod
@abstractmethod
def record_is_deleted(cls, xml: Tag) -> bool:
"""
Determine whether record has a status of deleted.
May be overridden by source subclasses if needed.
Args:
xml: A BeautifulSoup Tag representing a single XML record
"""
if xml.find("header", status="deleted"):
logger.debug(
f"Skipping record {cls.get_source_record_id(xml)} with header status "
"deleted"
)
return True
return False

@final
def get_required_fields(self, xml: Tag) -> dict:
"""
Expand Down Expand Up @@ -116,6 +135,8 @@ def transform(self, xml: Tag) -> Optional[TimdexRecord]:
Args:
xml: A BeautifulSoup Tag representing a single OAI-PMH XML record.
"""
if self.record_is_deleted(xml):
return None
optional_fields = self.get_optional_fields(xml)
if optional_fields is None:
return None
Expand Down

0 comments on commit 79d8bf6

Please sign in to comment.