Skip to content

Commit

Permalink
Updates based on discussion in PR #286
Browse files Browse the repository at this point in the history
* Refactor incremental_harvest_get_source_records method to populate Record objects with exceptions rather than skipping entirely
* Update corresponding unit test
  • Loading branch information
ehanson8 committed Apr 22, 2024
1 parent f63f22e commit db698a5
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
17 changes: 10 additions & 7 deletions harvester/harvest/mit.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,16 @@ def incremental_harvest_get_source_records(self) -> Iterator[Record]:
event=zip_file_event_message.event,
sqs_message=zip_file_event_message,
)
except OSError:
logger.exception("File not found")
continue
yield Record(
identifier=identifier,
source_record=source_record,
)
yield Record(
identifier=identifier,
source_record=source_record,
)
except OSError as exc:
yield Record(
identifier=identifier,
exception_stage="incremental_harvest_get_source_records",
exception=exc,
)

def harvester_specific_steps(self, records: Iterator[Record]) -> Iterator[Record]:
"""Harvest steps specific to MITHarvester
Expand Down
14 changes: 11 additions & 3 deletions tests/test_harvest/test_mit_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,19 @@ def test_mit_harvester_incremental_continues_after_missing_zip_file(
sqs_topic_name=mocked_sqs_topic_name,
)
records = harvester.incremental_harvest_get_source_records()
assert len(list(records)) == 1
failed_record, success_record = records
assert failed_record.identifier == "DEF456"
assert failed_record.exception_stage == "incremental_harvest_get_source_records"
assert isinstance(failed_record.exception, OSError)
assert (
"OSError: unable to access bucket: 'mocked_cdn_restricted' "
"key: 'cdn/geo/restricted/DEF456.zip'" in caplog.text
str(failed_record.exception)
== "unable to access bucket: 'mocked_cdn_restricted' key: "
"'cdn/geo/restricted/DEF456.zip' version: None error: An error occurred ("
"NoSuchKey) when calling the GetObject operation: The specified key does not "
"exist."
)
assert success_record.identifier == "SDE_DATA_AE_A8GNS_2003"
assert not success_record.exception


def test_mit_harvester_source_record_has_expected_values(caplog):
Expand Down

0 comments on commit db698a5

Please sign in to comment.