Skip to content

Commit

Permalink
Fix large exports failing
Browse files Browse the repository at this point in the history
closes pulp#5375
  • Loading branch information
quba42 authored and hstct committed May 15, 2024
1 parent b26e8b5 commit 9d8db20
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGES/5375.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed export of large amount of content failing because of changes in psycopg.
30 changes: 17 additions & 13 deletions pulpcore/app/importexport.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from django.db.models.query import QuerySet

from pulpcore.app.apps import get_plugin_config
from pulpcore.app.models.content import Artifact
from pulpcore.app.models.progress import ProgressReport
from pulpcore.app.models.repository import Repository
from pulpcore.app.modelresource import (
Expand Down Expand Up @@ -56,17 +57,17 @@ def process_batch(batch):

batch = []
needs_comma = False
for item in resource.queryset.iterator(chunk_size=EXPORT_BATCH_SIZE):
batch.append(item)
offset = 0
while True:
batch = list(resource.queryset[offset : offset + EXPORT_BATCH_SIZE])
if not batch:
break

if needs_comma:
# Write "," if not last loop
temp_file.write(", ")
needs_comma = False

if len(batch) >= EXPORT_BATCH_SIZE:
process_batch(batch)
batch.clear()
needs_comma = True
process_batch(batch)
needs_comma = True
offset += EXPORT_BATCH_SIZE

if batch:
process_batch(batch)
Expand Down Expand Up @@ -102,17 +103,19 @@ def export_versions(export, version_info):
export.tarfile.addfile(info, io.BytesIO(version_json))


def export_artifacts(export, artifacts):
def export_artifacts(export, artifact_pks):
"""
Export a set of Artifacts, ArtifactResources, and RepositoryResources
Args:
export (django.db.models.PulpExport): export instance that's doing the export
artifacts (django.db.models.Artifacts): List of artifacts in all repos being exported
artifact_pks (django.db.models.Artifacts): List of artifact_pks in all repos being exported
Raises:
ValidationError: When path is not in the ALLOWED_EXPORT_PATHS setting
"""
artifacts = Artifact.objects.filter(pk__in=artifact_pks)
artifact_files = Artifact.objects.filter(pk__in=artifact_pks).values_list("file", flat=True)
data = dict(message="Exporting Artifacts", code="export.artifacts", total=len(artifacts))
with ProgressReport(**data) as pb:
pb.BATCH_INTERVAL = 5000
Expand All @@ -130,8 +133,9 @@ def export_artifacts(export, artifacts):
artifact.file.close()
export.tarfile.add(temp_file.name, artifact.file.name)
else:
for artifact in pb.iter(artifacts.only("file").iterator()):
export.tarfile.add(artifact.file.path, artifact.file.name)
for artifact_file in pb.iter(artifact_files):
artifact_file_path = os.path.join("/var/lib/pulp/media/", artifact_file)
export.tarfile.add(artifact_file_path, artifact_file)

resource = ArtifactResource()
resource.queryset = artifacts
Expand Down
4 changes: 2 additions & 2 deletions pulpcore/app/tasks/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
RepositoryVersion,
Task,
)
from pulpcore.app.models.content import Artifact, ContentArtifact
from pulpcore.app.models.content import ContentArtifact
from pulpcore.app.serializers import PulpExportSerializer

from pulpcore.app.util import compute_file_hash, Crc32Hasher
Expand Down Expand Up @@ -509,7 +509,7 @@ def _do_export(pulp_exporter, tar, the_export):

# Export the top-level entities (artifacts and repositories)
# Note: we've already handled "what about incrementals" when building the 'artifacts' list
export_artifacts(the_export, Artifact.objects.filter(pk__in=artifact_pks))
export_artifacts(the_export, artifact_pks)
del artifact_pks

# Export the repository-version data, per-version
Expand Down

0 comments on commit 9d8db20

Please sign in to comment.