diff --git a/app/openbel/api/routes/datasets.rb b/app/openbel/api/routes/datasets.rb index ef06563..f890db5 100644 --- a/app/openbel/api/routes/datasets.rb +++ b/app/openbel/api/routes/datasets.rb @@ -16,7 +16,6 @@ class Datasets < Base include OpenBEL::Helpers DEFAULT_TYPE = 'application/hal+json' - ACCEPTED_TYPES = { :bel => 'application/bel', :xml => 'application/xml', @@ -24,6 +23,8 @@ class Datasets < Base :json => 'application/json', } + EVIDENCE_BATCH = 500 + def initialize(app) super @@ -233,33 +234,47 @@ def retrieve_dataset(uri) # Create dataset in RDF. @rr.insert_statements(void_dataset) - dataset = retrieve_dataset(void_dataset_uri) + dataset = retrieve_dataset(void_dataset_uri) + dataset_id = dataset[:identifier] + + # Add batches of read evidence objects; save to Mongo and RDF. + # TODO Add JRuby note regarding Enumerator threading. + evidence_batch = [] + BEL.evidence(io, type).each do |ev| + # Standardize annotations from experiment_context. + @annotation_transform.transform_evidence!(ev, base_url) - # Add slices of read evidence objects; save to Mongo and RDF. - BEL.evidence(io, type).each.lazy.each_slice(500) do |slice| - slice.map! do |ev| - # Standardize annotations from experiment_context. - @annotation_transform.transform_evidence!(ev, base_url) + ev.metadata[:dataset] = dataset_id + facets = map_evidence_facets(ev) + ev.bel_statement = ev.bel_statement.to_s + hash = ev.to_h + hash[:facets] = facets + # Create dataset field for efficient removal. + hash[:_dataset] = dataset_id - # Add filterable metadata field for dataset identifier. - ev.metadata[:dataset] = dataset[:identifier] + evidence_batch << hash - facets = map_evidence_facets(ev) - ev.bel_statement = ev.bel_statement.to_s - hash = ev.to_h - hash[:facets] = facets + if evidence_batch.size == EVIDENCE_BATCH + _ids = @api.create_evidence(evidence_batch) - # Create dataset field for efficient removal. - hash[:_dataset] = dataset[:identifier] - hash + dataset_parts = _ids.map { |object_id| + RDF::Statement.new(void_dataset_uri, RDF::DC.hasPart, object_id.to_s) + } + @rr.insert_statements(dataset_parts) + + evidence_batch.clear end + end - _ids = @api.create_evidence(slice) + unless evidence_batch.empty? + _ids = @api.create_evidence(evidence_batch) dataset_parts = _ids.map { |object_id| RDF::Statement.new(void_dataset_uri, RDF::DC.hasPart, object_id.to_s) } @rr.insert_statements(dataset_parts) + + evidence_batch.clear end status 201