Skip to content

Commit

Permalink
get person names from cited_as, if needed
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Feb 23, 2021
1 parent 7f99554 commit b4e7142
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
12 changes: 12 additions & 0 deletions share/regulate/steps/normalize_agent_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,26 @@ def regulate_node(self, node):

def _normalize_person(self, node):
name = strip_whitespace(node['name'] or '')

if not name:
# try building the name from parts
name = strip_whitespace(' '.join((
node['given_name'] or '',
node['additional_name'] or '',
node['family_name'] or '',
node['suffix'] or '',
)))

if not name:
# try getting the name from "cited_as"
cited_as_names = [
relation['cited_as']
for relation in node['work_relations']
if relation['cited_as']
]
if len(cited_as_names) == 1:
name = cited_as_names[0]

if not name or self.NULL_RE.match(name):
self.info('Discarding unnamed person', node.id)
node.delete()
Expand Down
8 changes: 7 additions & 1 deletion tests/share/normalize/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,13 @@ def test_normalize_mixed_agent_relation(self, input, output, Graph, ExpectedGrap
([
Creator(cited_as='', agent=Person(id=0, name='None', identifiers=[AgentIdentifier(1, id=1)])),
], [
])
]),
# Nameless agent with cited_as
([
Creator(cited_as='Magpie', agent=Person(id=0, name='', identifiers=[AgentIdentifier(1, id=1)])),
], [
Creator(cited_as='Magpie', agent=Person(id=0, name='Magpie', identifiers=[AgentIdentifier(1, id=1)])),
]),
])
def test_normalize_contributor_creator_relation(self, input, output, Graph, ExpectedGraph):
graph = Graph(CreativeWork(agent_relations=input))
Expand Down

0 comments on commit b4e7142

Please sign in to comment.