Skip to content

Commit

Permalink
fix: handle names consistently when formatting
Browse files Browse the repository at this point in the history
- if no `cited_as` and no `name`, make some cultural assumptions and
  try building a name from parts (TODO in the future: stop using name
  parts at all)
- correct ordering contributors for oai_dc
  • Loading branch information
aaxelb committed Mar 31, 2021
1 parent 122f953 commit 2b498ac
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 7 deletions.
14 changes: 10 additions & 4 deletions share/metadata_formats/oai_dc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from lxml import etree

from share.util.graph import MutableGraph
from share.util.names import get_related_agent_name

from share.oaipmh.util import format_datetime, ns, nsmap, SubEl
from share.metadata_formats.base import MetadataFormatter
Expand Down Expand Up @@ -87,7 +88,10 @@ def build_dublin_core(self, work_node):

def _get_related_agent_names(self, work_node, relation_types):
def sort_key(relation_node):
return relation_node['order_cited'] or 9999999 # those without order_cited go last
order_cited = relation_node['order_cited']
if order_cited is None:
return 9999999 # those without order_cited go last
return int(order_cited)

relation_nodes = sorted(
[
Expand All @@ -97,10 +101,12 @@ def sort_key(relation_node):
],
key=sort_key,
)
return [
relation['cited_as'] or relation['agent']['name']

# remove falsy values
return filter(None, [
get_related_agent_name(relation)
for relation in relation_nodes
]
])

def _get_related_uris(self, work_node):
related_work_uris = set()
Expand Down
5 changes: 3 additions & 2 deletions share/metadata_formats/sharev2_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from django.conf import settings

from share.util.graph import MutableGraph
from share.util.names import get_related_agent_name
from share.util import IDObfuscator

from .base import MetadataFormatter
Expand Down Expand Up @@ -135,7 +136,7 @@ def format(self, normalized_datum):

def _get_related_agent_names(self, work_node, relation_types):
return [
relation_node['cited_as'] or relation_node['agent']['name']
get_related_agent_name(relation_node)
for relation_node in work_node['agent_relations']
if relation_node.type in relation_types
]
Expand Down Expand Up @@ -181,7 +182,7 @@ def _build_list_agent(self, relation_node):
return {
'type': format_node_type(agent_node),
'types': format_node_type_lineage(agent_node),
'name': agent_node['name'] or relation_node['cited_as'],
'name': get_related_agent_name(relation_node),
'given_name': agent_node['given_name'],
'family_name': agent_node['family_name'],
'additional_name': agent_node['additional_name'],
Expand Down
28 changes: 28 additions & 0 deletions share/util/names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@

def build_name_from_parts(agent_node):
"""construct some name from parts, making wild cultural assumptions
@param agent_node: share.util.graph.MutableNode with concrete type 'abstractagent'
@returns string (possibly empty)
"""
# filter out falsy parts
name_parts = filter(None, [
agent_node['given_name'],
agent_node['additional_name'],
agent_node['family_name'],
agent_node['suffix'],
])
return ' '.join(name_parts).strip()


def get_related_agent_name(relation_node):
"""get the name to refer to a related agent
@param relation_node: share.util.graph.MutableNode with concrete type 'abstractagentworkrelation'
@returns string (possibly empty)
"""
return (
relation_node['cited_as']
or relation_node['agent']['name']
or build_name_from_parts(relation_node['agent'])
)
18 changes: 18 additions & 0 deletions tests/share/metadata_formats/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,24 @@
},
},
},
'no-names-only-name-parts': {
'suid_id': 7,
'source_name': 'SomeSource',
'normalized_datum_kwargs': {
'created_at': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'),
'data': {
'@graph': [
{'@id': '_:cfed87cc7294471eac2b67d9ce92f60b', '@type': 'person', 'given_name': 'Suzanne', 'family_name': 'Simard', 'identifiers': [], 'related_agents': []},
{'@id': '_:c786ef414acb423f878522690453a6b8', '@type': 'creator', 'agent': {'@id': '_:cfed87cc7294471eac2b67d9ce92f60b', '@type': 'person'}, 'order_cited': 0, 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}},
{'@id': '_:2afb5767c79c47c9ab6b87c7d5b3aa0a', '@type': 'person', 'given_name': 'Mary', 'family_name': 'Austi', 'identifiers': [], 'related_agents': []},
{'@id': '_:44ec4e74e8ae487cbd86abcde5c2a075', '@type': 'creator', 'agent': {'@id': '_:2afb5767c79c47c9ab6b87c7d5b3aa0a', '@type': 'person'}, 'order_cited': 1, 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}},
{'@id': '_:8ae1b46cd2f341cb968fbf76c9a7f345', 'uri': 'http://dx.doi.org/10.5772/9813', '@type': 'workidentifier', 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}},
{'@id': '_:de04f3a34eb047e98891662b5345afd9', 'tags': [], '@type': 'creativework', 'extra': {'type': 'book-chapter', 'member': 'http://id.crossref.org/member/3774', 'titles': ['The Role of Mycorrhizas in Forest Soil Stability with Climate Change'], 'date_created': '2012-03-29T07:53:20+00:00', 'date_published': {'date_parts': [[2010, 8, 17]]}, 'container_title': ['Climate Change and Variability'], 'published_online': {'date_parts': [[2010, 8, 17]]}}, 'title': 'The Role of Mycorrhizas in Forest Soil Stability with Climate Change', 'identifiers': [{'@id': '_:8ae1b46cd2f341cb968fbf76c9a7f345', '@type': 'workidentifier'}], 'date_updated': '2017-03-31T05:39:48+00:00', 'related_agents': [{'@id': '_:c786ef414acb423f878522690453a6b8', '@type': 'creator'}, {'@id': '_:44ec4e74e8ae487cbd86abcde5c2a075', '@type': 'creator'}, {'@id': '_:e0fdb4b7b6194b699078f26a799cd232', '@type': 'publisher'}]},
],
'@context': {}
},
},
},
'with-is_deleted': {
'suid_id': 57,
'source_name': 'foo',
Expand Down
17 changes: 16 additions & 1 deletion tests/share/metadata_formats/test_oai_dc_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,29 @@ def test_formatter(self, normalized_datum, expected_output):
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
>
<dc:title>The Role of Mycorrhizas in Forest Soil Stability with Climate Change</dc:title>
<dc:creator>Mary Austi</dc:creator>
<dc:creator>Suzanne Simard</dc:creator>
<dc:creator>Mary Austi</dc:creator>
<dc:publisher>InTech</dc:publisher>
<dc:date>2017-03-31T05:39:48Z</dc:date>
<dc:type>creativework</dc:type>
<dc:identifier>http://dx.doi.org/10.5772/9813</dc:identifier>
</oai_dc:dc>
''',
'no-names-only-name-parts': '''
<oai_dc:dc
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"
>
<dc:title>The Role of Mycorrhizas in Forest Soil Stability with Climate Change</dc:title>
<dc:creator>Suzanne Simard</dc:creator>
<dc:creator>Mary Austi</dc:creator>
<dc:date>2017-03-31T05:39:48Z</dc:date>
<dc:type>creativework</dc:type>
<dc:identifier>http://dx.doi.org/10.5772/9813</dc:identifier>
</oai_dc:dc>
''',
'with-is_deleted': None,
'with-subjects': '''
<oai_dc:dc
Expand Down
50 changes: 50 additions & 0 deletions tests/share/metadata_formats/test_sharev2_elastic_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,56 @@ def test_formatter(self, encode_mock, normalized_datum, expected_output):
],
},
},
'no-names-only-name-parts': {
'contributors': ['Suzanne Simard', 'Mary Austi'],
'date': '2017-03-31T05:39:48+00:00',
'date_created': '2017-04-07T21:09:05.023090+00:00',
'date_modified': '2017-04-07T21:09:05.023090+00:00',
'date_updated': '2017-03-31T05:39:48+00:00',
'id': 'encoded-7',
'identifiers': ['http://dx.doi.org/10.5772/9813'],
'publishers': [],
'retracted': False,
'sources': ['SomeSource'],
'title': 'The Role of Mycorrhizas in Forest Soil Stability with Climate Change',
'type': 'creative work',
'types': ['creative work'],
'affiliations': [],
'funders': [],
'hosts': [],
'subject_synonyms': [],
'subjects': [],
'tags': [],
'lists': {
'affiliations': [],
'contributors': [
{
'family_name': 'Simard',
'given_name': 'Suzanne',
'identifiers': [],
'name': 'Suzanne Simard',
'order_cited': 0,
'relation': 'creator',
'type': 'person',
'types': ['person', 'agent'],
},
{
'family_name': 'Austi',
'given_name': 'Mary',
'identifiers': [],
'name': 'Mary Austi',
'order_cited': 1,
'relation': 'creator',
'type': 'person',
'types': ['person', 'agent'],
},
],
'funders': [],
'hosts': [],
'lineage': [],
'publishers': [],
},
},
'with-is_deleted': {
'id': 'encoded-57',
'is_deleted': True,
Expand Down

0 comments on commit 2b498ac

Please sign in to comment.