1414from dsc .config import Config
1515from dsc .db .models import ItemSubmissionDB , ItemSubmissionStatus
1616from dsc .exceptions import (
17+ BatchCreationFailedError ,
1718 InvalidSQSMessageError ,
1819 InvalidWorkflowNameError ,
1920 ReconcileFailedError ,
@@ -221,6 +222,60 @@ def item_metadata_iter(self) -> Iterator[dict[str, Any]]:
221222 MUST be overridden by workflow subclasses.
222223 """
223224
225+ @final
226+ def create_batch (self ) -> None :
227+ """Create a batch of item submissions for processing.
228+
229+ A "batch" refers to a collection of item submissions that are grouped together
230+ for coordinated processing, storage, and workflow execution. Each batch
231+ typically consists of multiple items, each with its own metadata and
232+ associated files, organized under a unique batch identifier.
233+
234+ This method prepares the necessary assets in S3 (programmatically as needed)
235+ and records each item in the batch to DynamoDB.
236+ """
237+ item_submissions , errors = self .prepare_batch ()
238+ if errors :
239+ raise BatchCreationFailedError
240+ self ._create_batch_in_db (item_submissions )
241+
242+ @abstractmethod
243+ def prepare_batch (self ) -> tuple [list , ...]:
244+ """Prepare batch submission assets in S3.
245+
246+ This method performs the required steps to prepare a batch
247+ of item submissions in S3. These steps must include (at minimum)
248+ the following checks:
249+
250+ - Check if there is metadata for the item submission;
251+ otherwise raise dsc.exceptions.ItemMetadataNotFoundError
252+ - Check if there are any bitstreams for the item submission;
253+ otherwise raise dsc.exceptions.ItemBitstreamsNotFoundError
254+
255+ MUST be overridden by workflow subclasses.
256+
257+ Returns:
258+ A tuple of item submissions (init params) represented as a
259+ list of dicts and errors represented as a list of tuples
260+ containing the item identifier and the error message.
261+ """
262+ pass # noqa: PIE790
263+
264+ @final
265+ def _create_batch_in_db (self , item_submissions : list [dict ]) -> None :
266+ """Write records for a batch of item submissions to DynamoDB.
267+
268+ This method loops through the item submissions (init params)
269+ represented as a list dicts. For each item submission, the
270+ method creates an instance of ItemSubmission and saves the
271+ record to DynamoDB.
272+ """
273+ for item_submission_init_params in item_submissions :
274+ item_submission = ItemSubmission .create (** item_submission_init_params )
275+ item_submission .last_run_date = self .run_date
276+ item_submission .status = ItemSubmissionStatus .BATCH_CREATED
277+ item_submission .save ()
278+
224279 @final
225280 def reconcile_items (self ) -> bool :
226281 """Reconcile item submissions for a batch.
@@ -241,20 +296,24 @@ def reconcile_items(self) -> bool:
241296 NOTE: This method is likely the first time a record will be inserted
242297 into DynamoDB for each item submission. If already present,
243298 its status will be updated.
299+
300+ TODO: Reconcile methods will be deprecated after end-to-end testing.
244301 """
245302 reconciled_items = {} # key=item_identifier, value=list of bitstream URIs
246303 bitstreams_without_metadata = [] # list of bitstream URIs
247304 metadata_without_bitstreams = [] # list of item identifiers
248305
249306 # loop through each item metadata
250307 for item_metadata in self .item_metadata_iter ():
251- item_submission = ItemSubmission .get_or_create (
308+ item_submission = ItemSubmission .get (
252309 batch_id = self .batch_id ,
253310 item_identifier = item_metadata ["item_identifier" ],
254- workflow_name = self .workflow_name ,
255- source_system_identifier = item_metadata .get ("source_system_identifier" ),
256311 )
257312
313+ # if no corresponding record in DynamoDB, skip
314+ if not item_submission :
315+ continue
316+
258317 # attach source metadata
259318 item_submission .source_metadata = item_metadata
260319
@@ -336,8 +395,7 @@ def reconcile_items(self) -> bool:
336395 )
337396 return True
338397
339- @abstractmethod
340- def reconcile_item (self , item_submission : ItemSubmission ) -> bool :
398+ def reconcile_item (self , _item_submission : ItemSubmission ) -> bool :
341399 """Reconcile bitstreams and metadata for an item.
342400
343401 Items in DSpace represent a "work" and combine metadata and files,
@@ -348,7 +406,10 @@ def reconcile_item(self, item_submission: ItemSubmission) -> bool:
348406
349407 If an item fails reconcile, this method should raise
350408 dsc.exceptions.ReconcileFailed*Error. Otherwise, return True.
409+
410+ TODO: Reconcile methods will be deprecated after end-to-end testing.
351411 """
412+ return False
352413
353414 def _report_reconcile_workflow_events (
354415 self ,
0 commit comments