OpenConceptLab/ocl_issues#910 | Export queries | limit/offset on look…

…up table only
OpenConceptLab · Aug 13, 2021 · 9c12c81 · rkorytkowski · Aug 13, 2021 · snyaggarwal
1 parent de1e009
commit 9c12c81
Showing 1 changed file with 53 additions and 53 deletions.
diff --git a/core/common/utils.py b/core/common/utils.py
@@ -206,7 +206,7 @@ def write_export_file(
     resource_string = json.dumps(data, cls=encoders.JSONEncoder)
     logger.info('Done serializing attributes.')
 
-    batch_size = 100
+    batch_size = 2
     is_collection = resource_type == 'collection'
 
     if is_collection:
@@ -216,51 +216,44 @@ def write_export_file(
         concepts_qs = Concept.sources.through.objects.filter(source_id=version.id)
         mappings_qs = Mapping.sources.through.objects.filter(source_id=version.id)
 
-    all_concepts = Concept.objects.filter(id__in=concepts_qs.values('concept_id'))
-    all_mappings = Mapping.objects.filter(id__in=mappings_qs.values('mapping_id'))
-
     filters = dict()
 
     if not is_collection:
         filters['is_active'] = True
         if version.is_head:
             filters['is_latest_version'] = True
 
-    if filters:
-        all_concepts = all_concepts.filter(**filters)
-        all_mappings = all_mappings.filter(**filters)
-
-    total_concepts = all_concepts.count()
-    total_mappings = all_mappings.count()
-
-    def get_batch(queryset, offset, limit):
-        _queryset = queryset.order_by('-id')
-        if filters:
-            _queryset = _queryset.filter(**filters)
-        return _queryset[offset:limit]
-
     with open('export.json', 'w') as out:
         out.write('%s, "concepts": [' % resource_string[:-1])
 
     resource_name = resource_type.title()
 
-    if total_concepts:
-        logger.info(
-            '%s has %d concepts. Getting them in batches of %d...' % (resource_name, total_concepts, batch_size)
-        )
+    if concepts_qs.exists():
+        logger.info('%s has concepts. Getting them in batches of %d...' % (resource_name, batch_size))
         concept_serializer_class = get_class('core.concepts.serializers.ConceptVersionExportSerializer')
-        for start in range(0, total_concepts, batch_size):
-            end = min(start + batch_size, total_concepts)
-            logger.info('Serializing concepts %d - %d...' % (start+1, end))
-            concept_versions = get_batch(all_concepts, start, end).prefetch_related('names', 'descriptions')
-            concept_serializer = concept_serializer_class(concept_versions, many=True)
-            concept_data = concept_serializer.data
-            concept_string = json.dumps(concept_data, cls=encoders.JSONEncoder)
-            concept_string = concept_string[1:-1]
-            with open('export.json', 'a') as out:
-                out.write(concept_string)
-                if end != total_concepts:
-                    out.write(', ')
+        start = 0
+        end = batch_size
+        batch_queryset = concepts_qs.order_by('-concept_id')[start:end]
+
+        while batch_queryset.exists():
+            logger.info('Serializing concepts %d - %d...' % (start + 1, end))
+            queryset = Concept.objects.filter(id__in=batch_queryset.values_list('concept_id')).filter(**filters)
+            if queryset.exists():
+                if start > 0:
+                    with open('export.json', 'a') as out:
+                        out.write(', ')
+                concept_versions = queryset.prefetch_related('names', 'descriptions')
+                data = concept_serializer_class(concept_versions, many=True).data
+                concept_string = json.dumps(data, cls=encoders.JSONEncoder)
+                concept_string = concept_string[1:-1]
+
+                with open('export.json', 'a') as out:
+                    out.write(concept_string)
+
+            start += batch_size
+            end += batch_size
+            batch_queryset = concepts_qs.order_by('-concept_id')[start:end]
+
         logger.info('Done serializing concepts.')
     else:
         logger.info('%s has no concepts to serialize.' % resource_name)
@@ -295,27 +288,34 @@ def get_batch(queryset, offset, limit):
     with open('export.json', 'a') as out:
         out.write('], "mappings": [')
 
-    if total_mappings:
-        logger.info(
-            '%s has %d mappings. Getting them in batches of %d...' % (resource_name, total_mappings, batch_size)
-        )
+    if mappings_qs.exists():
+        logger.info('%s has mappings. Getting them in batches of %d...' % (resource_name, batch_size))
         mapping_serializer_class = get_class('core.mappings.serializers.MappingDetailSerializer')
-        for start in range(0, total_mappings, batch_size):
-            end = min(start + batch_size, total_mappings)
-            logger.info('Serializing mappings %d - %d...' % (start+1, end))
-            mappings = get_batch(all_mappings, start, end).select_related(
-                'from_concept', 'to_concept',
-                'from_source__organization', 'from_source__user',
-                'to_source__organization', 'to_source__user'
-            )
-            reference_serializer = mapping_serializer_class(mappings, many=True)
-            reference_data = reference_serializer.data
-            reference_string = json.dumps(reference_data, cls=encoders.JSONEncoder)
-            reference_string = reference_string[1:-1]
-            with open('export.json', 'a') as out:
-                out.write(reference_string)
-                if end != total_mappings:
-                    out.write(', ')
+        start = 0
+        end = batch_size
+        batch_queryset = mappings_qs.order_by('-mapping_id')[start:end]
+
+        while batch_queryset.exists():
+            logger.info('Serializing mappings %d - %d...' % (start + 1, start + batch_size))
+            queryset = Mapping.objects.filter(id__in=batch_queryset.values_list('mapping_id')).filter(**filters)
+            if queryset.exists():
+                if start > 0:
+                    with open('export.json', 'a') as out:
+                        out.write(', ')
+
+                mapping_versions = queryset.select_related(
+                    'from_concept', 'to_concept', 'from_source__organization', 'from_source__user',
+                    'to_source__organization', 'to_source__user')
+                data = mapping_serializer_class(mapping_versions, many=True).data
+                mapping_string = json.dumps(data, cls=encoders.JSONEncoder)
+                mapping_string = mapping_string[1:-1]
+                with open('export.json', 'a') as out:
+                    out.write(mapping_string)
+
+            start += batch_size
+            end += batch_size
+            batch_queryset = mappings_qs.order_by('-mapping_id')[start:end]
+
         logger.info('Done serializing mappings.')
     else:
         logger.info('%s has no mappings to serialize.' % resource_name)