Skip to content

Commit

Permalink
sharev2_elastic format: handle "deleted" subjects
Browse files Browse the repository at this point in the history
when OSF pushes metadata containing subjects, it will often include
"deleted" subjects that should be removed -- because SHARE historically
didn't understand what it meant to remove relations, and we added a
special-cased hack to allow removing subjects.

this is (will be) no longer necessary using FormattedMetadataRecords,
since older metadata will be replaced by , not merged with, new updates
  • Loading branch information
aaxelb committed Feb 4, 2021
1 parent 4b2f25c commit fec9537
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 5 deletions.
18 changes: 13 additions & 5 deletions share/metadata_formats/sharev2_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,23 @@ def _get_related_agent_names(self, work_node, relation_types):

def _get_subjects(self, work_node, source_name):
return [
self._serialize_subject(subject_node, source_name)
for subject_node in work_node['subjects']
self._serialize_subject(through_subject['subject'], source_name)
for through_subject in work_node['subject_relations']
if (
not through_subject['is_deleted']
and not through_subject['subject']['is_deleted']
)
]

def _get_subject_synonyms(self, work_node):
return [
self._serialize_subject(subject_node['central_synonym'])
for subject_node in work_node['subjects']
if subject_node['central_synonym']
self._serialize_subject(through_subject['subject']['central_synonym'])
for through_subject in work_node['subject_relations']
if (
not through_subject['is_deleted']
and not through_subject['subject']['is_deleted']
and through_subject['subject']['central_synonym']
)
]

def _serialize_subject(self, subject_node, source_name=None):
Expand Down
108 changes: 108 additions & 0 deletions tests/share/metadata_formats/test_sharev2_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,107 @@
'@type': 'registration'
}
},
{
'@id': '_:through-subj-architecture',
'@type': 'throughsubjects',
'creative_work': {
'@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece',
'@type': 'registration'
},
'subject': {
'@id': '_:subj-architecture',
'@type': 'subject'
},
},
{
'@id': '_:subj-architecture',
'@type': 'subject',
'name': 'Architecture',
},
{
'@id': '_:through-subj-business',
'@type': 'throughsubjects',
'is_deleted': True, # back-compat with a prior hack
'creative_work': {
'@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece',
'@type': 'registration'
},
'subject': {
'@id': '_:subj-business',
'@type': 'subject'
},
},
{
'@id': '_:subj-business',
'@type': 'subject',
'name': 'Business',
},
{
'@id': '_:through-subj-education',
'@type': 'throughsubjects',
'creative_work': {
'@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece',
'@type': 'registration'
},
'subject': {
'@id': '_:subj-education',
'@type': 'subject'
},
},
{
'@id': '_:subj-education',
'@type': 'subject',
'name': 'Education',
'is_deleted': True, # back-compat with a prior hack
},
{
'@id': '_:through-subj-custom-biology',
'@type': 'throughsubjects',
'creative_work': {
'@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece',
'@type': 'registration'
},
'subject': {
'@id': '_:subj-custom-biology',
'@type': 'subject'
},
},
{
'@id': '_:subj-custom-biology',
'@type': 'subject',
'name': 'Custom biologyyyy',
'parent': {
'@id': '_:subj-custom-life-sciences',
'@type': 'subject',
},
'central_synonym': {
'@id': '_:subj-central-biology',
'@type': 'subject',
},
},
{
'@id': '_:subj-custom-life-sciences',
'@type': 'subject',
'name': 'Custom life sciencesssss',
'central_synonym': {
'@id': '_:subj-central-life-sciences',
'@type': 'subject',
},
},
{
'@id': '_:subj-central-biology',
'@type': 'subject',
'name': 'Biology',
'parent': {
'@id': '_:subj-central-life-sciences',
'@type': 'subject',
},
},
{
'@id': '_:subj-central-life-sciences',
'@type': 'subject',
'name': 'Life Sciences',
},
],
},
},
Expand All @@ -257,6 +358,13 @@
'registration_type': 'Open-Ended Registration',
'retracted': False,
'sources': ['osf reg'],
'subject_synonyms': [
'bepress|Life Sciences|Biology',
],
'subjects': [
'bepress|Architecture',
'osf reg|Custom life sciencesssss|Custom biologyyyy',
],
'title': 'Assorted chair',
'type': 'registration',
'types': ['registration', 'publication', 'creative work'],
Expand Down

0 comments on commit fec9537

Please sign in to comment.