From 5b8c77a1a91b68fd951538cdb153405f2f4bc9fb Mon Sep 17 00:00:00 2001 From: Sherif Abdelhamid Date: Mon, 8 May 2017 11:44:53 -0400 Subject: [PATCH 1/2] [SHARE-633][Fix] Datacite.org parse description and tags * Update datacite source.yaml * Fix description and tags parsing --- share/sources/org.datacite/source.yaml | 1 + share/transformers/org_datacite.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/share/sources/org.datacite/source.yaml b/share/sources/org.datacite/source.yaml index 790d5312e..c5cbe6cfe 100644 --- a/share/sources/org.datacite/source.yaml +++ b/share/sources/org.datacite/source.yaml @@ -27,6 +27,7 @@ configs: namespaces: 'http://purl.org/dc/elements/1.1/': dc 'http://datacite.org/schema/kernel-3': null + 'http://datacite.org/schema/kernel-4': null 'http://www.openarchives.org/OAI/2.0/': null 'http://schema.datacite.org/oai/oai-1.0/': null 'http://www.openarchives.org/OAI/2.0/oai_dc/': null diff --git a/share/transformers/org_datacite.py b/share/transformers/org_datacite.py index 0f9b2f946..9d67b63f3 100644 --- a/share/transformers/org_datacite.py +++ b/share/transformers/org_datacite.py @@ -62,6 +62,10 @@ def force_text(data): if data is None: return '' if isinstance(data, dict): + if 'description' in data: + if '#text' in data['description']: + return data['description']['#text'] + raise Exception('description is not in {}'.format(data)) if '#text' in data: return data['#text'] raise Exception('#text is not in {}'.format(data)) @@ -490,7 +494,7 @@ def get_schema(self, type): ) description = tools.RunPython( force_text, - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.descriptions.description[0]) + tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.descriptions) ) rights = tools.Try( @@ -551,12 +555,12 @@ def get_schema(self, type): tags = tools.Map( tools.Delegate(ThroughTags), tools.RunPython( - force_text, + 'text_list', tools.Concat( tools.Maybe(tools.Maybe(ctx.record, 'metadata')['oai_datacite'], 'type'), tools.RunPython( 'text_list', - (tools.Concat(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.subjects.subject))) + tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.subjects.subject) ), tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.formats.format), tools.Try(ctx.record.metadata['oai_datacite'].datacentreSymbol), From 83ff714d94e209126045bf6fbe7cd61727935a4f Mon Sep 17 00:00:00 2001 From: Sherif Abdelhamid Date: Tue, 9 May 2017 18:11:45 -0400 Subject: [PATCH 2/2] Remove use of concat --- share/transformers/org_datacite.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/share/transformers/org_datacite.py b/share/transformers/org_datacite.py index 4c86c7fbe..f1bf5c60d 100644 --- a/share/transformers/org_datacite.py +++ b/share/transformers/org_datacite.py @@ -546,9 +546,7 @@ def get_schema(self, type): tools.Subjects( tools.RunPython( 'text_list', - tools.Concat( - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.subjects.subject), - ) + tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.subjects.subject) ) ) )