/
util.py
72 lines (57 loc) · 2.34 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Utility functions for Annif"""
import glob
import os
import os.path
import tempfile
import numpy as np
from annif import logger
from annif.suggestion import VectorSuggestionResult
def atomic_save(obj, dirname, filename, method=None):
"""Save the given object (which must have a .save() method, unless the
method parameter is given) into the given directory with the given
filename, using a temporary file and renaming the temporary file to the
final name."""
prefix, suffix = os.path.splitext(filename)
tempfd, tempfilename = tempfile.mkstemp(
prefix=prefix, suffix=suffix, dir=dirname)
os.close(tempfd)
logger.debug('saving %s to temporary file %s', str(obj), tempfilename)
if method is not None:
method(obj, tempfilename)
else:
obj.save(tempfilename)
for fn in glob.glob(tempfilename + '*'):
newname = fn.replace(tempfilename, os.path.join(dirname, filename))
logger.debug('renaming temporary file %s to %s', fn, newname)
os.rename(fn, newname)
def cleanup_uri(uri):
"""remove angle brackets from a URI, if any"""
if uri.startswith('<') and uri.endswith('>'):
return uri[1:-1]
return uri
def merge_hits(weighted_hits, subject_index):
"""Merge hits from multiple sources. Input is a sequence of WeightedSuggestion
objects. A SubjectIndex is needed to convert between subject IDs and URIs.
Returns an SuggestionResult object."""
weights = [whit.weight for whit in weighted_hits]
scores = [whit.hits.vector for whit in weighted_hits]
result = np.average(scores, axis=0, weights=weights)
return VectorSuggestionResult(result, subject_index)
def parse_sources(sourcedef):
"""parse a source definition such as 'src1:1.0,src2' into a sequence of
tuples (src_id, weight)"""
sources = []
for srcdef in sourcedef.strip().split(','):
srcval = srcdef.strip().split(':')
src_id = srcval[0]
if len(srcval) > 1:
weight = float(srcval[1])
else:
weight = 1.0
sources.append((src_id, weight))
return sources
def boolean(val):
"""Convert the given value to a boolean True/False value, if it isn't already.
True values are '1', 'yes', 'true', and 'on' (case insensitive), everything
else is False."""
return str(val).lower() in ('1', 'yes', 'true', 'on')