Skip to content

Commit

Permalink
Save record fields only required for EventBridge events (#314)
Browse files Browse the repository at this point in the history
Why these changes are being introduced:
* Saving the entire Record object to the 'event_records' dictionary in-memory
was causing the container to run out-of-memory. This update applies the
solution from #112.

How this addresses that need:
* Only save the fields that are required for the EventBridge details param.

Side effects of this change:
* None

Relevant ticket(s):
* https://mitlibraries.atlassian.net/browse/GDT-323
  • Loading branch information
jonavellecuerdo committed May 21, 2024
1 parent 85b778d commit e6b7e62
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 14 deletions.
33 changes: 21 additions & 12 deletions harvester/harvest/mit.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,23 +184,32 @@ def send_eventbridge_event(self, records: Iterator[Record]) -> Iterator[Record]:
path = path.removesuffix("/")

# queue EventBridge events
event_records: dict[str, Record] = {}
event_records: dict[str, dict] = {}
for record in records:
if not self.skip_eventbridge_events:
event_records[record.identifier] = record
record_dict = {
"record_identifier": record.identifier,
"source_record_is_restricted": record.source_record.is_restricted,
"source_record_is_deleted": record.source_record.is_deleted,
"source_metadata_filename": record.source_record.source_metadata_filename, # noqa: E501
"normalized_metadata_filename": record.source_record.normalized_metadata_filename, # noqa: E501
}
event_records[record.identifier] = record_dict
yield record

# after Records yielded, publish EventBridge events
for record in event_records.values():
message = f"Record {record.identifier}: sending EventBridge event"
for event_record in event_records.values():
message = (
f"Record {event_record['record_identifier']}: sending EventBridge event"
)
logger.debug(message)
try:
self._prepare_payload_and_send_event(bucket, path, record)
self._prepare_payload_and_send_event(bucket, path, event_record)
except Exception:
logger.exception("Error sending EventBridge event")

def _prepare_payload_and_send_event(
self, bucket: str, path: str, record: Record
self, bucket: str, path: str, record: dict
) -> str:
"""Prepare event detail and send event.
Expand All @@ -221,13 +230,13 @@ def _prepare_payload_and_send_event(
"""
detail = {
"bucket": bucket,
"identifier": record.identifier,
"restricted": json.dumps(record.source_record.is_restricted),
"deleted": json.dumps(record.source_record.is_deleted),
"identifier": record["record_identifier"],
"restricted": json.dumps(record["source_record_is_restricted"]),
"deleted": json.dumps(record["source_record_is_deleted"]),
"objects": [
{"Key": f"{path}/{record.source_record.source_metadata_filename}"},
{"Key": f"{path}/{record.source_record.normalized_metadata_filename}"},
{"Key": f"{path}/{record.identifier}.zip"},
{"Key": f"{path}/{record['source_metadata_filename']}"},
{"Key": f"{path}/{record['normalized_metadata_filename']}"},
{"Key": f"{path}/{record['record_identifier']}.zip"},
],
}
return EventBridgeClient.send_event(detail=detail)
Expand Down
11 changes: 9 additions & 2 deletions tests/test_harvest/test_mit_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def test_mit_harvester_send_eventbridge_duplicate_record_sends_one_last_event(
),
)
)

with mock.patch.object(
MITHarvester,
"_prepare_payload_and_send_event",
Expand All @@ -289,7 +290,7 @@ def test_mit_harvester_send_eventbridge_duplicate_record_sends_one_last_event(
_output_records = list(harvester.send_eventbridge_event(iter(records)))

mock_method.assert_called_once()
assert mock_method.mock_calls[0].args[2].source_record.event == "created"
assert not mock_method.mock_calls[0].args[2]["source_record_is_deleted"]


def test_mit_harvester_send_eventbridge_multiples_records_send_multiple_events(
Expand Down Expand Up @@ -341,7 +342,13 @@ def test_mit_harvester_prepare_payload_and_send_event_success(records_for_mit_st
harvester._prepare_payload_and_send_event(
"the-bucket",
"/path/here",
record,
record={
"record_identifier": record.identifier,
"source_record_is_restricted": record.source_record.is_restricted,
"source_record_is_deleted": record.source_record.is_deleted,
"source_metadata_filename": record.source_record.source_metadata_filename,
"normalized_metadata_filename": record.source_record.normalized_metadata_filename, # noqa: E501
},
)
mocked_send_event.assert_called_with(
detail={
Expand Down

0 comments on commit e6b7e62

Please sign in to comment.