From faf5ede097e82ab589e613f5b7d90257ea25ea49 Mon Sep 17 00:00:00 2001 From: Bertrand Zuchuat Date: Thu, 24 Feb 2022 14:46:04 +0100 Subject: [PATCH] oai: fix error on document type processing * Closes #789. Co-Authored-by: Bertrand Zuchuat --- sonar/modules/documents/serializers/dc.py | 10 ++++++++++ sonar/modules/documents/serializers/schemas/dc.py | 11 ++++++----- tests/ui/documents/test_dc_schema.py | 4 ++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/sonar/modules/documents/serializers/dc.py b/sonar/modules/documents/serializers/dc.py index 3a980a6ac..b906e79a2 100644 --- a/sonar/modules/documents/serializers/dc.py +++ b/sonar/modules/documents/serializers/dc.py @@ -17,12 +17,22 @@ """Dublin Core serializer.""" +from invenio_records_rest.serializers.base import PreprocessorMixin from invenio_records_rest.serializers.dc import DublinCoreSerializer class SonarDublinCoreSerializer(DublinCoreSerializer): """Marshmallow based DublinCore serializer for records.""" + @staticmethod + def preprocess_search_hit(pid, record_hit, links_factory=None, **kwargs): + """Prepare a record hit from Elasticsearch for serialization.""" + # TODO: SET VERSION BEFORE PROCESS RECORD. FIND BETTER SOLUTION. + if '_version' not in record_hit: + record_hit['_version'] = 1; + return PreprocessorMixin.preprocess_search_hit( + pid, record_hit, links_factory=None, **kwargs) + def dump(self, obj, context=None): """Serialize object with schema. diff --git a/sonar/modules/documents/serializers/schemas/dc.py b/sonar/modules/documents/serializers/schemas/dc.py index 0d9f150e5..7f3e3450c 100644 --- a/sonar/modules/documents/serializers/schemas/dc.py +++ b/sonar/modules/documents/serializers/schemas/dc.py @@ -249,11 +249,12 @@ def get_titles(self, obj): def get_types(self, obj): """Get types.""" - if obj['metadata'].get('documentType'): - return [ - 'http://purl.org/coar/resource_type/{type}'.format( - type=obj['metadata']['documentType'].split(':')[1]) - ] + if obj['metadata'].get('documentType', ''): + types = obj['metadata'].get('documentType', '').split(':') + if len(types) == 1: + return [f'{types[0]}'] + if len(types) == 2: + return [f'http://purl.org/coar/resource_type/{types[1]}'] return [] diff --git a/tests/ui/documents/test_dc_schema.py b/tests/ui/documents/test_dc_schema.py index ede4065eb..ec71e59d6 100644 --- a/tests/ui/documents/test_dc_schema.py +++ b/tests/ui/documents/test_dc_schema.py @@ -458,6 +458,10 @@ def test_types(minimal_document): result = dc_v1.transform_record(minimal_document['pid'], minimal_document) assert result['types'] == [] + minimal_document['documentType'] = 'advanced_studies_thesis' + result = dc_v1.transform_record(minimal_document['pid'], minimal_document) + assert result['types'] == ['advanced_studies_thesis'] + minimal_document['documentType'] = 'coar:c_2f33' result = dc_v1.transform_record(minimal_document['pid'], minimal_document) assert result['types'] == ['http://purl.org/coar/resource_type/c_2f33']