From 490fe5f04794d838e322fd4482b5b1035aaed777 Mon Sep 17 00:00:00 2001 From: Quirin Pamp Date: Wed, 15 May 2024 15:03:30 +0200 Subject: [PATCH] WIP: Fix large exports --- pulpcore/app/importexport.py | 12 ++++++++---- pulpcore/app/tasks/export.py | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pulpcore/app/importexport.py b/pulpcore/app/importexport.py index 35deeefd3e..97ccb9742d 100644 --- a/pulpcore/app/importexport.py +++ b/pulpcore/app/importexport.py @@ -9,6 +9,7 @@ from django.db.models.query import QuerySet from pulpcore.app.apps import get_plugin_config +from pulpcore.app.models.content import Artifact from pulpcore.app.models.progress import ProgressReport from pulpcore.app.models.repository import Repository from pulpcore.app.modelresource import ( @@ -102,17 +103,19 @@ def export_versions(export, version_info): export.tarfile.addfile(info, io.BytesIO(version_json)) -def export_artifacts(export, artifacts): +def export_artifacts(export, artifact_pks): """ Export a set of Artifacts, ArtifactResources, and RepositoryResources Args: export (django.db.models.PulpExport): export instance that's doing the export - artifacts (django.db.models.Artifacts): List of artifacts in all repos being exported + artifact_pks (django.db.models.Artifacts): List of artifact_pks in all repos being exported Raises: ValidationError: When path is not in the ALLOWED_EXPORT_PATHS setting """ + artifacts = Artifact.objects.filter(pk__in=artifact_pks) + artifact_files = Artifact.objects.filter(pk__in=artifact_pks).values_list("file", flat=True) data = dict(message="Exporting Artifacts", code="export.artifacts", total=len(artifacts)) with ProgressReport(**data) as pb: pb.BATCH_INTERVAL = 5000 @@ -130,8 +133,9 @@ def export_artifacts(export, artifacts): artifact.file.close() export.tarfile.add(temp_file.name, artifact.file.name) else: - for artifact in pb.iter(artifacts.only("file").iterator()): - export.tarfile.add(artifact.file.path, artifact.file.name) + for artifact_file in pb.iter(artifact_files): + artifact_file_path = os.path.join('/var/lib/pulp/media/', artifact_file) + export.tarfile.add(artifact_file_path, artifact_file) resource = ArtifactResource() resource.queryset = artifacts diff --git a/pulpcore/app/tasks/export.py b/pulpcore/app/tasks/export.py index 57e23caef9..fb4f06d6c9 100644 --- a/pulpcore/app/tasks/export.py +++ b/pulpcore/app/tasks/export.py @@ -509,7 +509,7 @@ def _do_export(pulp_exporter, tar, the_export): # Export the top-level entities (artifacts and repositories) # Note: we've already handled "what about incrementals" when building the 'artifacts' list - export_artifacts(the_export, Artifact.objects.filter(pk__in=artifact_pks)) + export_artifacts(the_export, artifact_pks) del artifact_pks # Export the repository-version data, per-version