Skip to content

Commit

Permalink
Merge pull request #91 from NatLibFi/issue81-preflabel-uniqueness-det…
Browse files Browse the repository at this point in the history
…erministic

Make prefLabel policies deterministic by using label string sort order to break ties
  • Loading branch information
osma committed Aug 31, 2021
2 parents 71fd48b + 95324ea commit 202ad1e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 1 deletion.
2 changes: 1 addition & 1 deletion skosify/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def preflabel_uniqueness(rdf, policy='all'):
return

def key_fn(label):
return [policy_fn[p](label) for p in policies]
return [policy_fn[p](label) for p in policies] + [str(label)]

for res in sorted(resources):
prefLabels = {}
Expand Down
32 changes: 32 additions & 0 deletions test/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,35 @@ def test_preflabel_uniqueness_shortest_uppercase():
assert (a, SKOS.altLabel, Literal('short', 'en')) in rdf
assert (a, SKOS.altLabel, Literal('longer', 'en')) in rdf
assert (a, SKOS.altLabel, Literal('Longer', 'en')) in rdf


def test_preflabel_uniqueness_is_deterministic():
rdf = Graph()
a = BNode()

rdf.add((a, RDF.type, SKOS.Concept))
# all English labels have the same length, tie must be broken
rdf.add((a, SKOS.prefLabel, Literal('bab', 'en'))) # remove
rdf.add((a, SKOS.prefLabel, Literal('bba', 'en'))) # remove
rdf.add((a, SKOS.prefLabel, Literal('aab', 'en'))) # keep
rdf.add((a, SKOS.prefLabel, Literal('aba', 'en'))) # remove

# ditto for Finnish labels
rdf.add((a, SKOS.prefLabel, Literal('ba', 'fi'))) # remove
rdf.add((a, SKOS.prefLabel, Literal('bb', 'fi'))) # remove
rdf.add((a, SKOS.prefLabel, Literal('aa', 'fi'))) # keep
rdf.add((a, SKOS.prefLabel, Literal('ab', 'fi'))) # remove

len_before = len(rdf)

skosify.check.preflabel_uniqueness(rdf, policy=['shortest'])
assert len(rdf) == len_before
assert (a, SKOS.prefLabel, Literal('aab', 'en')) in rdf
assert (a, SKOS.altLabel, Literal('bab', 'en')) in rdf
assert (a, SKOS.altLabel, Literal('bba', 'en')) in rdf
assert (a, SKOS.altLabel, Literal('aba', 'en')) in rdf

assert (a, SKOS.prefLabel, Literal('aa', 'fi')) in rdf
assert (a, SKOS.altLabel, Literal('ba', 'fi')) in rdf
assert (a, SKOS.altLabel, Literal('bb', 'fi')) in rdf
assert (a, SKOS.altLabel, Literal('ab', 'fi')) in rdf

0 comments on commit 202ad1e

Please sign in to comment.