From 3634ac5b371adbb5467776dd26525fc03c13d866 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Thu, 23 Apr 2026 21:07:14 +0300 Subject: [PATCH 1/9] added cedar metadata record creation/deletion in share for collection submission --- api/share/utils.py | 41 +++++++++++++++++++++++++++++ osf/models/collection_submission.py | 16 +++++++++++ 2 files changed, 57 insertions(+) diff --git a/api/share/utils.py b/api/share/utils.py index 583b148cb9e..189d3989e2b 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -4,6 +4,7 @@ """ from http import HTTPStatus import logging +from rdflib import Graph from django.apps import apps from celery.utils.time import get_exponential_backoff_interval @@ -14,6 +15,7 @@ from framework.encryption import ensure_bytes from framework.sentry import log_exception from osf.external.gravy_valet.exceptions import GVException +from osf.metadata.rdfutils import OSF from osf.metadata.osf_gathering import ( OsfmapPartition, pls_get_magic_metadata_basket, @@ -64,6 +66,45 @@ def _enqueue_update_share(osfresource): enqueue_task(task__update_share.s(_osfguid_value)) +@celery_app.task() +def share_update_cedar_metadata_record(osf_obj_guid, cedar_record): + referent = osf_obj_guid.referent + + graph = Graph() + full_metadata = { + '@id': referent.get_semantic_iri(), + OSF.hasCedarRecord: cedar_record.metadata, + } + graph.parse(data=full_metadata, format='json-ld') + + serialized_data = graph.serialize(format='turtle') + requests.post( + shtrove_ingest_url(), + params={ + 'record_identifier': f"CedarMetadataRecord:{cedar_record.guid._id}:{cedar_record.template.cedar_id}", + 'is_supplementary': True, + }, + headers={ + 'Content-Type': 'text/turtle; charset=utf-8', + **_shtrove_auth_headers(referent), + }, + data=ensure_bytes(serialized_data), + ) + + +@celery_app.task() +def share_delete_cedar_metadata_record(osf_obj_guid, cedar_record): + referent = osf_obj_guid.referent + + requests.delete( + shtrove_ingest_url(), + params={ + 'record_identifier': f"CedarMetadataRecord:{cedar_record.guid._id}:{cedar_record.template.cedar_id}", + }, + headers=_shtrove_auth_headers(referent), + ) + + @celery_app.task( bind=True, acks_late=True, diff --git a/osf/models/collection_submission.py b/osf/models/collection_submission.py index f2de5ba6610..69b6bf2c69c 100644 --- a/osf/models/collection_submission.py +++ b/osf/models/collection_submission.py @@ -4,11 +4,16 @@ from django.utils import timezone from django.utils.functional import cached_property from framework import sentry +from framework.celery_tasks.handlers import enqueue_task from framework.exceptions import PermissionsError from website.settings import DOMAIN from .base import BaseModel from .mixins import TaxonomizableMixin +from api.share.utils import ( + share_update_cedar_metadata_record, + share_delete_cedar_metadata_record +) from osf.utils.permissions import ADMIN from website.util import api_v2_url from website.search.exceptions import SearchUnavailableError @@ -461,6 +466,17 @@ def update_search(self): # It will automatically determine if a referent is part of the collection update_share(self.guid.referent) + for cedar_record in self.guid.cedar_metadata_records.filter( + is_published=True, + template__should_index_for_search=True + ): + enqueue_task(share_update_cedar_metadata_record.s(self.guid, cedar_record)) + + for cedar_record in self.guid.cedar_metadata_records.filter( + models.Q(is_published=False) | models.Q(template__should_index_for_search=True) + ): + enqueue_task(share_delete_cedar_metadata_record.s(self.guid, cedar_record)) + try: update_collected_metadata(self.guid._id, collection_id=self.collection.id) except SearchUnavailableError as e: From f52c484f6405ff17bde58e0ee4aef8c4d0ef0292 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Fri, 24 Apr 2026 13:23:20 +0300 Subject: [PATCH 2/9] removed validate_required_metadata redefinition --- osf/models/provider.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/osf/models/provider.py b/osf/models/provider.py index 8984abf9f55..dd32a88ff59 100644 --- a/osf/models/provider.py +++ b/osf/models/provider.py @@ -167,23 +167,6 @@ def update_or_create_from_json(cls, provider_data, user): related_name='required_by_providers', ) - def validate_required_metadata(self, obj): - """ - Raises ValidationError if obj does not have a CedarMetadataRecord for - this provider's required_metadata_template. - Does nothing when required_metadata_template is not set. - """ - if not self.required_metadata_template_id: - return - guid = obj.guids.first() - if guid is None or not guid.cedar_metadata_records.filter( - template_id=self.required_metadata_template_id - ).exists(): - raise ValidationError( - f'Submitted object must have a CEDAR metadata record for template ' - f'"{self.required_metadata_template.schema_name}" to be submitted to this collection.' - ) - def __repr__(self): return ('(name={self.name!r}, default_license={self.default_license!r}, ' 'allow_submissions={self.allow_submissions!r}) with id {self.id!r}').format(self=self) From ec8fb9f265b40f946d7892c8ac158198e10a0ee1 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Fri, 24 Apr 2026 19:36:24 +0300 Subject: [PATCH 3/9] enqueue cedar record update in share --- api/share/utils.py | 20 ++++++++++++++++---- osf/models/collection_submission.py | 4 ++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/api/share/utils.py b/api/share/utils.py index 189d3989e2b..dcea21ca01f 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -67,8 +67,14 @@ def _enqueue_update_share(osfresource): @celery_app.task() -def share_update_cedar_metadata_record(osf_obj_guid, cedar_record): - referent = osf_obj_guid.referent +def share_update_cedar_metadata_record(guid_id, cedar_record_pk): + from osf.models import CedarMetadataRecord, Guid + + guid = Guid.load(guid_id) + referent = guid.referent + cedar_record = CedarMetadataRecord.objects.filter(pk=cedar_record_pk).first() + if not cedar_record: + return graph = Graph() full_metadata = { @@ -93,8 +99,14 @@ def share_update_cedar_metadata_record(osf_obj_guid, cedar_record): @celery_app.task() -def share_delete_cedar_metadata_record(osf_obj_guid, cedar_record): - referent = osf_obj_guid.referent +def share_delete_cedar_metadata_record(guid_id, cedar_record_pk): + from osf.models import CedarMetadataRecord, Guid + + guid = Guid.load(guid_id) + referent = guid.referent + cedar_record = CedarMetadataRecord.objects.filter(pk=cedar_record_pk).first() + if not cedar_record: + return requests.delete( shtrove_ingest_url(), diff --git a/osf/models/collection_submission.py b/osf/models/collection_submission.py index 69b6bf2c69c..f11cb589f8a 100644 --- a/osf/models/collection_submission.py +++ b/osf/models/collection_submission.py @@ -470,12 +470,12 @@ def update_search(self): is_published=True, template__should_index_for_search=True ): - enqueue_task(share_update_cedar_metadata_record.s(self.guid, cedar_record)) + enqueue_task(share_update_cedar_metadata_record.s(self.guid._id, cedar_record.pk)) for cedar_record in self.guid.cedar_metadata_records.filter( models.Q(is_published=False) | models.Q(template__should_index_for_search=True) ): - enqueue_task(share_delete_cedar_metadata_record.s(self.guid, cedar_record)) + enqueue_task(share_delete_cedar_metadata_record.s(self.guid._id, cedar_record.pk)) try: update_collected_metadata(self.guid._id, collection_id=self.collection.id) From 06ef8687d1cb004c7eff54c7434e0e26006ef276 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Mon, 27 Apr 2026 19:08:35 +0300 Subject: [PATCH 4/9] added focus_iri to shtrove POST request --- api/share/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/share/utils.py b/api/share/utils.py index dcea21ca01f..932bbb305d7 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -77,8 +77,9 @@ def share_update_cedar_metadata_record(guid_id, cedar_record_pk): return graph = Graph() + iri = referent.get_semantic_iri() full_metadata = { - '@id': referent.get_semantic_iri(), + '@id': iri, OSF.hasCedarRecord: cedar_record.metadata, } graph.parse(data=full_metadata, format='json-ld') @@ -87,6 +88,7 @@ def share_update_cedar_metadata_record(guid_id, cedar_record_pk): requests.post( shtrove_ingest_url(), params={ + 'focus_iri': iri, 'record_identifier': f"CedarMetadataRecord:{cedar_record.guid._id}:{cedar_record.template.cedar_id}", 'is_supplementary': True, }, From 52c8fedec22dfd80bef1ef77908d7ca07d893c48 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Thu, 30 Apr 2026 16:58:06 +0300 Subject: [PATCH 5/9] separate cedar record identifier to avoid potential issues --- api/share/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/api/share/utils.py b/api/share/utils.py index 932bbb305d7..a634cedf1be 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -21,6 +21,7 @@ pls_get_magic_metadata_basket, ) from osf.metadata.serializers import get_metadata_serializer +from osf.models import CedarMetadataRecord from website import settings @@ -89,7 +90,7 @@ def share_update_cedar_metadata_record(guid_id, cedar_record_pk): shtrove_ingest_url(), params={ 'focus_iri': iri, - 'record_identifier': f"CedarMetadataRecord:{cedar_record.guid._id}:{cedar_record.template.cedar_id}", + 'record_identifier': _shtrove_cedar_record_identifier(cedar_record), 'is_supplementary': True, }, headers={ @@ -113,7 +114,7 @@ def share_delete_cedar_metadata_record(guid_id, cedar_record_pk): requests.delete( shtrove_ingest_url(), params={ - 'record_identifier': f"CedarMetadataRecord:{cedar_record.guid._id}:{cedar_record.template.cedar_id}", + 'record_identifier': _shtrove_cedar_record_identifier(cedar_record), }, headers=_shtrove_auth_headers(referent), ) @@ -234,6 +235,10 @@ def _shtrove_record_identifier(osf_item, osfmap_partition: OsfmapPartition): ) +def _shtrove_cedar_record_identifier(cedar_record: CedarMetadataRecord) -> str: + return f'{cedar_record.guid._id}/CedarMetadataRecord:{cedar_record.template.cedar_id}' + + def _shtrove_auth_headers(osf_item): _nonfile_item = ( osf_item.target From 13ccfd34928962f49bceffcdc67fe647cdfc915e Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Thu, 30 Apr 2026 17:06:29 +0300 Subject: [PATCH 6/9] separate cedar record serialization --- api/share/utils.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/api/share/utils.py b/api/share/utils.py index a634cedf1be..73cabe1761c 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -67,6 +67,18 @@ def _enqueue_update_share(osfresource): enqueue_task(task__update_share.s(_osfguid_value)) +def cedar_record_to_turtle(referent, cedar_record): + graph = Graph() + iri = referent.get_semantic_iri() + full_metadata = { + '@id': iri, + OSF.hasCedarRecord: cedar_record.metadata, + } + graph.parse(data=full_metadata, format='json-ld') + + return graph.serialize(format='turtle') + + @celery_app.task() def share_update_cedar_metadata_record(guid_id, cedar_record_pk): from osf.models import CedarMetadataRecord, Guid @@ -77,19 +89,11 @@ def share_update_cedar_metadata_record(guid_id, cedar_record_pk): if not cedar_record: return - graph = Graph() - iri = referent.get_semantic_iri() - full_metadata = { - '@id': iri, - OSF.hasCedarRecord: cedar_record.metadata, - } - graph.parse(data=full_metadata, format='json-ld') - - serialized_data = graph.serialize(format='turtle') + serialized_data = cedar_record_to_turtle(referent, cedar_record) requests.post( shtrove_ingest_url(), params={ - 'focus_iri': iri, + 'focus_iri': referent.get_semantic_iri(), 'record_identifier': _shtrove_cedar_record_identifier(cedar_record), 'is_supplementary': True, }, From b1630a119c79c1b264a7d3957e1434223d8bf575 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Thu, 30 Apr 2026 21:16:48 +0300 Subject: [PATCH 7/9] added retry, moved tasks to task__update_share --- api/share/utils.py | 93 ++++++++------ osf/models/collection_submission.py | 16 --- osf_tests/test_collection_submission.py | 155 +++++++++++++++++++++++- 3 files changed, 209 insertions(+), 55 deletions(-) diff --git a/api/share/utils.py b/api/share/utils.py index 73cabe1761c..53de276c576 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -7,6 +7,7 @@ from rdflib import Graph from django.apps import apps +from django.db.models import Q from celery.utils.time import get_exponential_backoff_interval import requests @@ -21,7 +22,6 @@ pls_get_magic_metadata_basket, ) from osf.metadata.serializers import get_metadata_serializer -from osf.models import CedarMetadataRecord from website import settings @@ -67,6 +67,29 @@ def _enqueue_update_share(osfresource): enqueue_task(task__update_share.s(_osfguid_value)) +def retry_shtrove_request(self_celery_task, _response): + try: + _response.raise_for_status() + except Exception as e: + log_exception(e) + if _response.status_code == HTTPStatus.TOO_MANY_REQUESTS: + retry_after = _response.headers.get('Retry-After') + try: + countdown = int(retry_after) + except (TypeError, ValueError): + retries = getattr(self_celery_task.request, 'retries', 0) + countdown = get_exponential_backoff_interval( + factor=4, + retries=retries, + maximum=2 * 60, + full_jitter=True, + ) + raise self_celery_task.retry(exc=e, countdown=countdown) + + if HTTPStatus(_response.status_code).is_server_error: + raise self_celery_task.retry(exc=e) + + def cedar_record_to_turtle(referent, cedar_record): graph = Graph() iri = referent.get_semantic_iri() @@ -79,8 +102,8 @@ def cedar_record_to_turtle(referent, cedar_record): return graph.serialize(format='turtle') -@celery_app.task() -def share_update_cedar_metadata_record(guid_id, cedar_record_pk): +@celery_app.task(bind=True) +def share_update_cedar_metadata_record(self, guid_id, cedar_record_pk): from osf.models import CedarMetadataRecord, Guid guid = Guid.load(guid_id) @@ -90,7 +113,7 @@ def share_update_cedar_metadata_record(guid_id, cedar_record_pk): return serialized_data = cedar_record_to_turtle(referent, cedar_record) - requests.post( + response = requests.post( shtrove_ingest_url(), params={ 'focus_iri': referent.get_semantic_iri(), @@ -103,25 +126,26 @@ def share_update_cedar_metadata_record(guid_id, cedar_record_pk): }, data=ensure_bytes(serialized_data), ) + retry_shtrove_request(self, response) -@celery_app.task() -def share_delete_cedar_metadata_record(guid_id, cedar_record_pk): +@celery_app.task(bind=True) +def share_delete_cedar_metadata_record(self, guid_id, cedar_record_pk): from osf.models import CedarMetadataRecord, Guid - guid = Guid.load(guid_id) referent = guid.referent cedar_record = CedarMetadataRecord.objects.filter(pk=cedar_record_pk).first() if not cedar_record: return - requests.delete( + response = requests.delete( shtrove_ingest_url(), params={ 'record_identifier': _shtrove_cedar_record_identifier(cedar_record), }, headers=_shtrove_auth_headers(referent), ) + retry_shtrove_request(self, response) @celery_app.task( @@ -154,36 +178,29 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name log_exception(e) raise self.retry(exc=e) - try: - _response.raise_for_status() - except Exception as e: - log_exception(e) - if _response.status_code == HTTPStatus.TOO_MANY_REQUESTS: - retry_after = _response.headers.get('Retry-After') - try: - countdown = int(retry_after) - except (TypeError, ValueError): - retries = getattr(self.request, 'retries', 0) - countdown = get_exponential_backoff_interval( - factor=4, - retries=retries, - maximum=2 * 60, - full_jitter=True, - ) - raise self.retry(exc=e, countdown=countdown) + retry_shtrove_request(self, _response) + # success response + if _is_deletion: + return - if HTTPStatus(_response.status_code).is_server_error: - raise self.retry(exc=e) - else: # success response - if not _is_deletion: - # enqueue followup task for supplementary metadata - _next_partition = _next_osfmap_partition(_osfmap_partition) - if _next_partition is not None: - task__update_share.delay( - guid, - is_backfill=is_backfill, - osfmap_partition_name=_next_partition.name, - ) + # enqueue followup task for supplementary metadata + _next_partition = _next_osfmap_partition(_osfmap_partition) + if _next_partition is not None: + task__update_share.delay( + guid, + is_backfill=is_backfill, + osfmap_partition_name=_next_partition.name, + ) + for cedar_record in _osfid_instance.cedar_metadata_records.filter( + is_published=True, + template__should_index_for_search=True, + ): + enqueue_task(share_update_cedar_metadata_record.s(_osfid_instance._id, cedar_record.pk)) + + for cedar_record in _osfid_instance.cedar_metadata_records.filter( + Q(is_published=False) | Q(template__should_index_for_search=False), + ): + enqueue_task(share_delete_cedar_metadata_record.s(_osfid_instance._id, cedar_record.pk)) def pls_send_trove_record(osf_item, *, is_backfill: bool, osfmap_partition: OsfmapPartition): @@ -239,7 +256,7 @@ def _shtrove_record_identifier(osf_item, osfmap_partition: OsfmapPartition): ) -def _shtrove_cedar_record_identifier(cedar_record: CedarMetadataRecord) -> str: +def _shtrove_cedar_record_identifier(cedar_record) -> str: return f'{cedar_record.guid._id}/CedarMetadataRecord:{cedar_record.template.cedar_id}' diff --git a/osf/models/collection_submission.py b/osf/models/collection_submission.py index f11cb589f8a..f2de5ba6610 100644 --- a/osf/models/collection_submission.py +++ b/osf/models/collection_submission.py @@ -4,16 +4,11 @@ from django.utils import timezone from django.utils.functional import cached_property from framework import sentry -from framework.celery_tasks.handlers import enqueue_task from framework.exceptions import PermissionsError from website.settings import DOMAIN from .base import BaseModel from .mixins import TaxonomizableMixin -from api.share.utils import ( - share_update_cedar_metadata_record, - share_delete_cedar_metadata_record -) from osf.utils.permissions import ADMIN from website.util import api_v2_url from website.search.exceptions import SearchUnavailableError @@ -466,17 +461,6 @@ def update_search(self): # It will automatically determine if a referent is part of the collection update_share(self.guid.referent) - for cedar_record in self.guid.cedar_metadata_records.filter( - is_published=True, - template__should_index_for_search=True - ): - enqueue_task(share_update_cedar_metadata_record.s(self.guid._id, cedar_record.pk)) - - for cedar_record in self.guid.cedar_metadata_records.filter( - models.Q(is_published=False) | models.Q(template__should_index_for_search=True) - ): - enqueue_task(share_delete_cedar_metadata_record.s(self.guid._id, cedar_record.pk)) - try: update_collected_metadata(self.guid._id, collection_id=self.collection.id) except SearchUnavailableError as e: diff --git a/osf_tests/test_collection_submission.py b/osf_tests/test_collection_submission.py index 2b661f00873..d6ab07c004a 100644 --- a/osf_tests/test_collection_submission.py +++ b/osf_tests/test_collection_submission.py @@ -1,3 +1,4 @@ +from unittest import mock import pytest from osf_tests.factories import ( @@ -8,11 +9,12 @@ from osf_tests.factories import NodeFactory, CollectionFactory, CollectionProviderFactory -from osf.models import CollectionSubmission, NotificationTypeEnum +from osf.models import CollectionSubmission, NotificationTypeEnum, CedarMetadataRecord, CedarMetadataTemplate from osf.utils.workflows import CollectionSubmissionStates from framework.exceptions import PermissionsError from api_tests.utils import UserRoles from django.utils import timezone +from website import settings from tests.utils import capture_notifications @@ -147,6 +149,38 @@ def configure_test_auth(node, user_role, provider=None): return user +@pytest.fixture() +def unmoderated_collection_submission_public(node, unmoderated_collection): + unmoderated_collection.is_public = True + unmoderated_collection.save() + collection_submission = CollectionSubmission( + guid=node.guids.first(), + collection=unmoderated_collection, + creator=node.creator, + ) + with capture_notifications(): + collection_submission.save() + assert not collection_submission.is_moderated + return collection_submission + + +@pytest.fixture() +def cedar_template_json(): + return {'t_key_1': 't_value_1', 't_key_2': 't_value_2', 't_key_3': 't_value_3'} + + +@pytest.fixture() +def cedar_template(cedar_template_json): + return CedarMetadataTemplate.objects.create( + schema_name='cedar_test_schema_name', + cedar_id='cedar_test_id', + template_version=1, + template=cedar_template_json, + active=True, + should_index_for_search=True + ) + + @pytest.mark.django_db class TestModeratedCollectionSubmission: @@ -574,3 +608,122 @@ def test_cancel_succeeds(self, node, hybrid_moderated_collection_submission): with capture_notifications(): hybrid_moderated_collection_submission.cancel(user=user, comment='Test Comment') assert hybrid_moderated_collection_submission.state == CollectionSubmissionStates.IN_PROGRESS + + +@pytest.mark.django_db +@pytest.mark.enable_enqueue_task +@mock.patch.object(settings, 'SHARE_ENABLED', True) +@mock.patch.object(settings, 'USE_CELERY', False) # run tasks synchronously +class TestCollectionSubmissionWithCedarRecord: + + @mock.patch('api.share.utils.pls_send_trove_record') + @mock.patch('api.share.utils.share_update_cedar_metadata_record') + @mock.patch('api.share.utils.share_delete_cedar_metadata_record') + def test_unindexable_template_and_unpublished_record_calls_records_deletion( + self, + mock_delete, + mock_create, + mock_pls, + unmoderated_collection_submission_public, + cedar_template, + cedar_template_json + ): + cedar_template.should_index_for_search = False + cedar_template.save() + CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template_json, + is_published=False, + ) + obj = mock.Mock() + obj.status_code = 200 + mock_pls.return_value = obj + unmoderated_collection_submission_public.save() + + assert not mock_create.s.called + assert mock_delete.s.called + + @mock.patch('api.share.utils.pls_send_trove_record') + @mock.patch('api.share.utils.share_update_cedar_metadata_record') + @mock.patch('api.share.utils.share_delete_cedar_metadata_record') + def test_indexable_template_and_unpublished_record_calls_records_deletion( + self, + mock_delete, + mock_create, + mock_pls, + unmoderated_collection_submission_public, + cedar_template, + cedar_template_json + ): + cedar_template.should_index_for_search = True + cedar_template.save() + CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template_json, + is_published=False, + ) + obj = mock.Mock() + obj.status_code = 200 + mock_pls.return_value = obj + unmoderated_collection_submission_public.save() + + assert not mock_create.s.called + assert mock_delete.s.called + + @mock.patch('api.share.utils.pls_send_trove_record') + @mock.patch('api.share.utils.share_update_cedar_metadata_record') + @mock.patch('api.share.utils.share_delete_cedar_metadata_record') + def test_unindexable_template_and_published_record_calls_records_deletion( + self, + mock_delete, + mock_create, + mock_pls, + unmoderated_collection_submission_public, + cedar_template, + cedar_template_json + ): + cedar_template.should_index_for_search = False + cedar_template.save() + CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template_json, + is_published=True, + ) + obj = mock.Mock() + obj.status_code = 200 + mock_pls.return_value = obj + unmoderated_collection_submission_public.save() + + assert not mock_create.s.called + assert mock_delete.s.called + + @mock.patch('api.share.utils.pls_send_trove_record') + @mock.patch('api.share.utils.share_update_cedar_metadata_record') + @mock.patch('api.share.utils.share_delete_cedar_metadata_record') + def test_indexable_template_and_published_record_call_shtrove( + self, + mock_delete, + mock_create, + mock_pls, + unmoderated_collection_submission_public, + cedar_template, + cedar_template_json + ): + cedar_template.should_index_for_search = True + cedar_template.save() + CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template_json, + is_published=True, + ) + obj = mock.Mock() + obj.status_code = 200 + mock_pls.return_value = obj + unmoderated_collection_submission_public.save() + + assert mock_create.s.called + assert not mock_delete.s.called From 3561ed0a93c8182c2190cf3d845f8045ec721c53 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Fri, 1 May 2026 17:56:11 +0300 Subject: [PATCH 8/9] added more tests, make delete cedar record function working without cedar record existence --- api/share/utils.py | 38 +++-- osf_tests/test_collection_submission.py | 198 +++++++++++++++++++++++- 2 files changed, 216 insertions(+), 20 deletions(-) diff --git a/api/share/utils.py b/api/share/utils.py index 53de276c576..91912da9942 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -103,10 +103,10 @@ def cedar_record_to_turtle(referent, cedar_record): @celery_app.task(bind=True) -def share_update_cedar_metadata_record(self, guid_id, cedar_record_pk): - from osf.models import CedarMetadataRecord, Guid +def share_update_cedar_metadata_record(self, referent_id, cedar_record_pk): + from osf.models import Guid, CedarMetadataRecord - guid = Guid.load(guid_id) + guid = Guid.load(referent_id) referent = guid.referent cedar_record = CedarMetadataRecord.objects.filter(pk=cedar_record_pk).first() if not cedar_record: @@ -117,7 +117,7 @@ def share_update_cedar_metadata_record(self, guid_id, cedar_record_pk): shtrove_ingest_url(), params={ 'focus_iri': referent.get_semantic_iri(), - 'record_identifier': _shtrove_cedar_record_identifier(cedar_record), + 'record_identifier': _shtrove_cedar_record_identifier(cedar_record._id, cedar_record.template.cedar_id), 'is_supplementary': True, }, headers={ @@ -130,18 +130,18 @@ def share_update_cedar_metadata_record(self, guid_id, cedar_record_pk): @celery_app.task(bind=True) -def share_delete_cedar_metadata_record(self, guid_id, cedar_record_pk): - from osf.models import CedarMetadataRecord, Guid - guid = Guid.load(guid_id) - referent = guid.referent - cedar_record = CedarMetadataRecord.objects.filter(pk=cedar_record_pk).first() - if not cedar_record: - return - +def share_delete_cedar_metadata_record( + self, + cedar_referent___id, + cedar_record___id, + cedar_template_cedar_id, +): + from osf.models import Guid + referent = Guid.load(cedar_referent___id).referent response = requests.delete( shtrove_ingest_url(), params={ - 'record_identifier': _shtrove_cedar_record_identifier(cedar_record), + 'record_identifier': _shtrove_cedar_record_identifier(cedar_record___id, cedar_template_cedar_id), }, headers=_shtrove_auth_headers(referent), ) @@ -200,7 +200,13 @@ def task__update_share(self, guid: str, is_backfill=False, osfmap_partition_name for cedar_record in _osfid_instance.cedar_metadata_records.filter( Q(is_published=False) | Q(template__should_index_for_search=False), ): - enqueue_task(share_delete_cedar_metadata_record.s(_osfid_instance._id, cedar_record.pk)) + enqueue_task( + share_delete_cedar_metadata_record.s( + cedar_record.guid._id, + cedar_record._id, + cedar_record.template.cedar_id, + ), + ) def pls_send_trove_record(osf_item, *, is_backfill: bool, osfmap_partition: OsfmapPartition): @@ -256,8 +262,8 @@ def _shtrove_record_identifier(osf_item, osfmap_partition: OsfmapPartition): ) -def _shtrove_cedar_record_identifier(cedar_record) -> str: - return f'{cedar_record.guid._id}/CedarMetadataRecord:{cedar_record.template.cedar_id}' +def _shtrove_cedar_record_identifier(cedar_record___id, template_cedar_id) -> str: + return f'{cedar_record___id}/CedarMetadataRecord:{template_cedar_id}' def _shtrove_auth_headers(osf_item): diff --git a/osf_tests/test_collection_submission.py b/osf_tests/test_collection_submission.py index d6ab07c004a..32a33256bb2 100644 --- a/osf_tests/test_collection_submission.py +++ b/osf_tests/test_collection_submission.py @@ -13,6 +13,7 @@ from osf.utils.workflows import CollectionSubmissionStates from framework.exceptions import PermissionsError from api_tests.utils import UserRoles +from api.share.utils import cedar_record_to_turtle, _shtrove_cedar_record_identifier from django.utils import timezone from website import settings @@ -630,7 +631,7 @@ def test_unindexable_template_and_unpublished_record_calls_records_deletion( ): cedar_template.should_index_for_search = False cedar_template.save() - CedarMetadataRecord.objects.create( + record = CedarMetadataRecord.objects.create( guid=unmoderated_collection_submission_public.guid, template=cedar_template, metadata=cedar_template_json, @@ -643,6 +644,11 @@ def test_unindexable_template_and_unpublished_record_calls_records_deletion( assert not mock_create.s.called assert mock_delete.s.called + mock_delete.s.assert_called_with( + record.guid._id, + record._id, + record.template.cedar_id + ) @mock.patch('api.share.utils.pls_send_trove_record') @mock.patch('api.share.utils.share_update_cedar_metadata_record') @@ -658,7 +664,7 @@ def test_indexable_template_and_unpublished_record_calls_records_deletion( ): cedar_template.should_index_for_search = True cedar_template.save() - CedarMetadataRecord.objects.create( + record = CedarMetadataRecord.objects.create( guid=unmoderated_collection_submission_public.guid, template=cedar_template, metadata=cedar_template_json, @@ -671,6 +677,11 @@ def test_indexable_template_and_unpublished_record_calls_records_deletion( assert not mock_create.s.called assert mock_delete.s.called + mock_delete.s.assert_called_with( + record.guid._id, + record._id, + record.template.cedar_id + ) @mock.patch('api.share.utils.pls_send_trove_record') @mock.patch('api.share.utils.share_update_cedar_metadata_record') @@ -686,7 +697,7 @@ def test_unindexable_template_and_published_record_calls_records_deletion( ): cedar_template.should_index_for_search = False cedar_template.save() - CedarMetadataRecord.objects.create( + record = CedarMetadataRecord.objects.create( guid=unmoderated_collection_submission_public.guid, template=cedar_template, metadata=cedar_template_json, @@ -699,6 +710,11 @@ def test_unindexable_template_and_published_record_calls_records_deletion( assert not mock_create.s.called assert mock_delete.s.called + mock_delete.s.assert_called_with( + record.guid._id, + record._id, + record.template.cedar_id + ) @mock.patch('api.share.utils.pls_send_trove_record') @mock.patch('api.share.utils.share_update_cedar_metadata_record') @@ -714,7 +730,7 @@ def test_indexable_template_and_published_record_call_shtrove( ): cedar_template.should_index_for_search = True cedar_template.save() - CedarMetadataRecord.objects.create( + record = CedarMetadataRecord.objects.create( guid=unmoderated_collection_submission_public.guid, template=cedar_template, metadata=cedar_template_json, @@ -726,4 +742,178 @@ def test_indexable_template_and_published_record_call_shtrove( unmoderated_collection_submission_public.save() assert mock_create.s.called + mock_create.s.assert_called_with(unmoderated_collection_submission_public.guid._id, record.pk) assert not mock_delete.s.called + + def test_share_update_cedar_metadata_record(self, unmoderated_collection_submission_public, cedar_template): + metadata = { + '@context': { + 'pav': 'http://purl.org/pav/', + 'url': 'http://schema.org/url', + 'xsd': 'http://www.w3.org/2001/XMLSchema#', + 'name': 'http://schema.org/name', + 'oslc': 'http://open-services.net/ns/core#', + 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#', + 'skos': 'http://www.w3.org/2004/02/skos/core#', + 'author': 'http://schema.org/author', + 'funder': 'https://schema.metadatacenter.org/properties/c35f0660-2072-46a3-8e0d-532e40d94919', + 'schema': 'http://schema.org/', + 'license': 'http://schema.org/license', + 'citation': 'http://schema.org/citation', + 'keywords': 'http://schema.org/keywords', + 'identifier': 'http://schema.org/identifier', + 'rdfs:label': { + '@type': 'xsd:string' + }, + 'description': 'http://schema.org/description', + 'schema:name': { + '@type': 'xsd:string' + }, + 'pav:createdBy': { + '@type': '@id' + }, + 'pav:createdOn': { + '@type': 'xsd:dateTime' + }, + 'skos:notation': { + '@type': 'xsd:string' + }, + 'oslc:modifiedBy': { + '@type': '@id' + }, + 'pav:derivedFrom': { + '@type': '@id' + }, + 'schema:isBasedOn': { + '@type': '@id' + }, + 'variableMeasured': 'http://schema.org/variableMeasured', + 'pav:lastUpdatedOn': { + '@type': 'xsd:dateTime' + }, + 'schema:description': { + '@type': 'xsd:string' + }, + 'About this template': 'https://repo.metadatacenter.org/template-fields/bc66544c-e100-439e-9e80-9b35537368e5' + }, + 'name': { + '@value': 'name' + }, + 'description': { + '@value': 'description' + }, + 'variableMeasured': [ + { + '@value': 'variable' + } + ], + 'author': [ + { + '@value': None + } + ], + 'citation': { + '@value': None + }, + 'license': { + '@value': None + }, + 'funder': [ + { + '@value': '1111111' + } + ], + 'url': {}, + 'keywords': { + '@value': None + }, + 'identifier': {} + } + record = CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=metadata, + is_published=True, + ) + result = cedar_record_to_turtle(record.guid.referent, record) + vocab_url = '' + schema_url = '' + schema_metadata_url = '' + urls_to_find = { + vocab_url: None, + schema_url: None, + schema_metadata_url: None + } + for url in urls_to_find.keys(): + urls_to_find[url] = result[result.index(url) - 3] + + # fetch urls from result and assign ns prefixes based on order of appearance in result to make test resilient to changes in order of namespace declaration in turtle output + ns1 = list(filter(lambda url: urls_to_find[url] == '1', urls_to_find.keys()))[0] + ns2 = list(filter(lambda url: urls_to_find[url] == '2', urls_to_find.keys()))[0] + ns3 = list(filter(lambda url: urls_to_find[url] == '3', urls_to_find.keys()))[0] + vocab_n = urls_to_find[vocab_url] + schema_n = urls_to_find[schema_url] + schema_metadata_n = urls_to_find[schema_metadata_url] + # compose expected result dynamically based on ordering of prefixes + # however ns attributes are strictly attached to specific prefix + expected = ( + f'@prefix ns1: {ns1} .\n' + f'@prefix ns2: {ns2} .\n' + f'@prefix ns3: {ns3} .\n\n' + f' ns{vocab_n}:hasCedarRecord [ ns{schema_n}:description "description" ;\n' + f' ns{schema_n}:identifier [ ] ;\n' + f' ns{schema_n}:name "name" ;\n' + f' ns{schema_n}:url [ ] ;\n' + f' ns{schema_n}:variableMeasured "variable" ;\n' + f' ns{schema_metadata_n}:c35f0660-2072-46a3-8e0d-532e40d94919 "1111111" ] .\n\n' + ) + + assert result == expected + + def test_cedar_record_identifier_on_create(self, unmoderated_collection_submission_public, cedar_template): + cedar_template.should_index_for_search = True + cedar_template.save() + + with mock.patch('api.share.utils.pls_send_trove_record'): + with mock.patch('api.share.utils.share_update_cedar_metadata_record'): + with mock.patch('api.share.utils.share_delete_cedar_metadata_record'): + to_create_record = CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template.template, + is_published=True, + ) + + with mock.patch('api.share.utils.pls_send_trove_record'): + with mock.patch('api.share.utils.share_delete_cedar_metadata_record'): + with mock.patch('api.share.utils._shtrove_cedar_record_identifier') as mock_identifier: + unmoderated_collection_submission_public.save() + mock_identifier.assert_called_with( + to_create_record._id, + to_create_record.template.cedar_id + ) + assert ( + _shtrove_cedar_record_identifier(to_create_record._id, to_create_record.template.cedar_id) == + f'{to_create_record._id}/CedarMetadataRecord:{to_create_record.template.cedar_id}' + ) + + def test_cedar_record_identifier_on_delete(self, unmoderated_collection_submission_public, cedar_template): + with mock.patch('api.share.utils.pls_send_trove_record'): + with mock.patch('api.share.utils.share_update_cedar_metadata_record'): + with mock.patch('api.share.utils.share_delete_cedar_metadata_record'): + to_delete_record = CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template.template, + is_published=False, + ) + + with mock.patch('api.share.utils.pls_send_trove_record'): + with mock.patch('api.share.utils.share_update_cedar_metadata_record'): + with mock.patch('api.share.utils._shtrove_cedar_record_identifier') as mock_identifier: + unmoderated_collection_submission_public.save() + mock_identifier.assert_called_with(to_delete_record._id, to_delete_record.template.cedar_id) + assert ( + _shtrove_cedar_record_identifier(to_delete_record._id, to_delete_record.template.cedar_id) == + f'{to_delete_record._id}/CedarMetadataRecord:{to_delete_record.template.cedar_id}' + ) From ff4876f603fe5fec1002044ca36523dfe1fce052 Mon Sep 17 00:00:00 2001 From: Ihor Sokhan Date: Mon, 4 May 2026 17:57:14 +0300 Subject: [PATCH 9/9] fixed tests --- api/share/utils.py | 3 +- api_tests/share/test_share_preprint.py | 12 +- osf_tests/test_collection_submission.py | 181 +++++++++++++++++------- 3 files changed, 142 insertions(+), 54 deletions(-) diff --git a/api/share/utils.py b/api/share/utils.py index 91912da9942..b8e919f6f20 100644 --- a/api/share/utils.py +++ b/api/share/utils.py @@ -86,8 +86,7 @@ def retry_shtrove_request(self_celery_task, _response): ) raise self_celery_task.retry(exc=e, countdown=countdown) - if HTTPStatus(_response.status_code).is_server_error: - raise self_celery_task.retry(exc=e) + raise self_celery_task.retry(exc=e) def cedar_record_to_turtle(referent, cedar_record): diff --git a/api_tests/share/test_share_preprint.py b/api_tests/share/test_share_preprint.py index 118abf3105b..95993042532 100644 --- a/api_tests/share/test_share_preprint.py +++ b/api_tests/share/test_share_preprint.py @@ -134,12 +134,20 @@ def test_call_async_update_on_500_failure(self, mock_share_responses, preprint, with expect_preprint_ingest_request(mock_share_responses, preprint, count=5): preprint.update_search() - def test_no_call_async_update_on_400_failure(self, mock_share_responses, preprint, auth): + @mock.patch('api.share.utils.task__update_share.delay') + def test_no_call_async_update_on_400_failure(self, share_delay, mock_share_responses, preprint, auth): with capture_notifications(): mock_share_responses.replace(responses.POST, shtrove_ingest_url(), status=400) preprint.set_published(True, auth=auth, save=True) with expect_preprint_ingest_request(mock_share_responses, preprint, count=1, error_response=True): - preprint.update_search() + try: + preprint.update_search() + except Exception as err: + share_delay.assert_not_called() + assert str(err).startswith("Retry in 180s: HTTPError('400 Client Error:") + assert len(mock_share_responses.calls) == 1 + else: + pytest.fail('Expected Retry(HTTPError) to be raised') def test_delete_from_share(self, mock_share_responses): preprint = PreprintFactory() diff --git a/osf_tests/test_collection_submission.py b/osf_tests/test_collection_submission.py index 32a33256bb2..54d282b7690 100644 --- a/osf_tests/test_collection_submission.py +++ b/osf_tests/test_collection_submission.py @@ -1,5 +1,9 @@ +from contextlib import suppress from unittest import mock import pytest +from requests import Response +from requests.exceptions import HTTPError + from osf_tests.factories import ( AuthUserFactory, @@ -829,12 +833,14 @@ def test_share_update_cedar_metadata_record(self, unmoderated_collection_submiss }, 'identifier': {} } - record = CedarMetadataRecord.objects.create( - guid=unmoderated_collection_submission_public.guid, - template=cedar_template, - metadata=metadata, - is_published=True, - ) + with mock.patch('api.share.utils.pls_send_trove_record'): + record = CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=metadata, + is_published=True, + ) + result = cedar_record_to_turtle(record.guid.referent, record) vocab_url = '' schema_url = '' @@ -870,50 +876,125 @@ def test_share_update_cedar_metadata_record(self, unmoderated_collection_submiss assert result == expected - def test_cedar_record_identifier_on_create(self, unmoderated_collection_submission_public, cedar_template): - cedar_template.should_index_for_search = True - cedar_template.save() + @mock.patch('api.share.utils.pls_send_trove_record') + @mock.patch('api.share.utils.share_delete_cedar_metadata_record') + def test_cedar_record_identifier_on_create(self, mock_delete, mock_pls, unmoderated_collection_submission_public): + template = CedarMetadataTemplate.objects.create(schema_name='http://google.com', cedar_id='http26', template_version=1) + template.should_index_for_search = True + template.save() + with mock.patch('api.share.utils.share_update_cedar_metadata_record'): + to_create_record = CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=template, + metadata=template.template, + is_published=True, + ) - with mock.patch('api.share.utils.pls_send_trove_record'): - with mock.patch('api.share.utils.share_update_cedar_metadata_record'): - with mock.patch('api.share.utils.share_delete_cedar_metadata_record'): - to_create_record = CedarMetadataRecord.objects.create( - guid=unmoderated_collection_submission_public.guid, - template=cedar_template, - metadata=cedar_template.template, - is_published=True, - ) + with mock.patch('api.share.utils.requests.post'): + with mock.patch('api.share.utils._shtrove_cedar_record_identifier') as mock_identifier: + unmoderated_collection_submission_public.save() + mock_identifier.assert_called_with( + to_create_record._id, + to_create_record.template.cedar_id + ) + assert ( + _shtrove_cedar_record_identifier(to_create_record._id, to_create_record.template.cedar_id) == + f'{to_create_record._id}/CedarMetadataRecord:http26' + ) - with mock.patch('api.share.utils.pls_send_trove_record'): - with mock.patch('api.share.utils.share_delete_cedar_metadata_record'): - with mock.patch('api.share.utils._shtrove_cedar_record_identifier') as mock_identifier: - unmoderated_collection_submission_public.save() - mock_identifier.assert_called_with( - to_create_record._id, - to_create_record.template.cedar_id - ) - assert ( - _shtrove_cedar_record_identifier(to_create_record._id, to_create_record.template.cedar_id) == - f'{to_create_record._id}/CedarMetadataRecord:{to_create_record.template.cedar_id}' - ) - - def test_cedar_record_identifier_on_delete(self, unmoderated_collection_submission_public, cedar_template): - with mock.patch('api.share.utils.pls_send_trove_record'): - with mock.patch('api.share.utils.share_update_cedar_metadata_record'): - with mock.patch('api.share.utils.share_delete_cedar_metadata_record'): - to_delete_record = CedarMetadataRecord.objects.create( - guid=unmoderated_collection_submission_public.guid, - template=cedar_template, - metadata=cedar_template.template, - is_published=False, - ) + @mock.patch('api.share.utils.pls_send_trove_record') + @mock.patch('api.share.utils.share_update_cedar_metadata_record') + def test_cedar_record_identifier_on_delete(self, mock_update, mock_pls, unmoderated_collection_submission_public): + template = CedarMetadataTemplate.objects.create(schema_name='http://google.com', cedar_id='http25', template_version=1) + with mock.patch('api.share.utils.share_delete_cedar_metadata_record'): + to_delete_record = CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=template, + metadata=template.template, + is_published=False, + ) - with mock.patch('api.share.utils.pls_send_trove_record'): - with mock.patch('api.share.utils.share_update_cedar_metadata_record'): - with mock.patch('api.share.utils._shtrove_cedar_record_identifier') as mock_identifier: - unmoderated_collection_submission_public.save() - mock_identifier.assert_called_with(to_delete_record._id, to_delete_record.template.cedar_id) - assert ( - _shtrove_cedar_record_identifier(to_delete_record._id, to_delete_record.template.cedar_id) == - f'{to_delete_record._id}/CedarMetadataRecord:{to_delete_record.template.cedar_id}' - ) + with mock.patch('api.share.utils.requests.delete'): + with mock.patch('api.share.utils._shtrove_cedar_record_identifier') as mock_identifier: + unmoderated_collection_submission_public.save() + mock_identifier.assert_called_with(to_delete_record._id, to_delete_record.template.cedar_id) + assert ( + _shtrove_cedar_record_identifier(to_delete_record._id, to_delete_record.template.cedar_id) == + f'{to_delete_record._id}/CedarMetadataRecord:http25' + ) + + @mock.patch('api.share.utils.share_update_cedar_metadata_record') + @mock.patch('api.share.utils.share_delete_cedar_metadata_record') + def test_cedar_record_create_retry( + self, + mock_delete, + mock_create, + unmoderated_collection_submission_public, + cedar_template, + cedar_template_json + ): + cedar_template.should_index_for_search = True + cedar_template.save() + with mock.patch('api.share.utils.pls_send_trove_record') as mock_pls: + mock_pls.return_value = Response() + mock_pls.return_value.status_code = 400 + with suppress(Exception): + CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template_json, + is_published=True, + ) + + def mock_raise_for_status(*args, **kwargs): + raise HTTPError('Retry error') + + mock_pls.return_value = Response() + mock_pls.return_value.status_code = 400 + mock_pls.return_value.raise_for_status = mock_raise_for_status + try: + unmoderated_collection_submission_public.save() + except Exception as err: + assert str(err) == "Retry in 180s: HTTPError('Retry error')" + assert not mock_create.s.called + assert not mock_delete.s.called + else: + pytest.fail('Expected Retry(HTTPError) to be raised') + + @mock.patch('api.share.utils.share_update_cedar_metadata_record') + @mock.patch('api.share.utils.share_delete_cedar_metadata_record') + def test_cedar_record_delete_retry( + self, + mock_delete, + mock_create, + unmoderated_collection_submission_public, + cedar_template, + cedar_template_json + ): + cedar_template.should_index_for_search = False + cedar_template.save() + with mock.patch('api.share.utils.pls_send_trove_record') as mock_pls: + mock_pls.return_value = Response() + mock_pls.return_value.status_code = 400 + with suppress(Exception): + CedarMetadataRecord.objects.create( + guid=unmoderated_collection_submission_public.guid, + template=cedar_template, + metadata=cedar_template_json, + is_published=True, + ) + + def mock_raise_for_status(*args, **kwargs): + raise HTTPError('Retry error') + + mock_pls.return_value = Response() + mock_pls.return_value.status_code = 400 + mock_pls.return_value.raise_for_status = mock_raise_for_status + try: + unmoderated_collection_submission_public.save() + except Exception as err: + assert str(err) == "Retry in 180s: HTTPError('Retry error')" + assert not mock_create.s.called + assert not mock_delete.s.called + else: + pytest.fail('Expected Retry(HTTPError) to be raised')