-
Notifications
You must be signed in to change notification settings - Fork 7
/
ot-info-for-taxon-name.py
executable file
·213 lines (200 loc) · 11.9 KB
/
ot-info-for-taxon-name.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/env python
"""Simple command-line tool that combines the actions
of the ot-tnrs-match-names.py, ot-taxon-info.py, and ot-taxon-subtree.py
scripts
"""
import pprint
import sys
def ot_tnrs_match_names(name_list,
context_name=None,
do_approximate_matching=True,
include_dubious=False,
include_deprecated=True,
tnrs_wrapper=None):
"""Uses a peyotl wrapper around an Open Tree web service to get a list of OTT IDs matching
the `name_list`.
The tnrs_wrapper can be None (in which case the default wrapper from peyotl.sugar will be used.
All other arguments correspond to the arguments of the web-service call.
A ValueError will be raised if the `context_name` does not match one of the valid names for a
taxonomic context.
This uses the wrap_response option to create and return a TNRSRespose object around the response.
"""
if tnrs_wrapper is None:
from peyotl.sugar import tnrs
tnrs_wrapper = tnrs
match_obj = tnrs_wrapper.match_names(name_list,
context_name=context_name,
do_approximate_matching=do_approximate_matching,
include_deprecated=include_deprecated,
include_dubious=include_dubious,
wrap_response=True)
return match_obj
def fetch_and_write_taxon_info(id_list, include_anc, list_tips, output):
from peyotl.sugar import taxonomy
assert (list_tips == False) # args.list_tips once https://github.com/OpenTreeOfLife/taxomachine/issues/89 is fixed @TEMP
for ott_id in id_list:
info = taxonomy.taxon(ott_id,
include_lineage=include_anc,
list_terminal_descendants=list_tips,
wrap_response=True)
write_taxon_info(info, include_anc, output)
def write_taxon_info(taxon, include_anc, output):
"""Writes out data from `taxon` to the `output` stream to demonstrate
the attributes of a taxon object.
(currently some lines are commented out until the web-services call returns more info. See:
https://github.com/OpenTreeOfLife/taxomachine/issues/85
).
If `include_anc` is True, then ancestor information was requested (so a None parent is only
expected at the root of the tree)
"""
output.write('Taxon info for OTT ID (ot:ottId) = {}\n'.format(taxon.ott_id))
output.write(' name (ot:ottTaxonName) = "{}"\n'.format(taxon.name))
if taxon.synonyms:
output.write(' known synonyms: "{}"\n'.format('", "'.join(taxon.synonyms)))
else:
output.write(' known synonyms: \n')
output.write(' OTT flags for this taxon: {}\n'.format(taxon.flags))
output.write(' The taxonomic rank associated with this name is: {}\n'.format(taxon.rank))
output.write(
' The (unstable) node ID in the current taxomachine instance is: {}\n'.format(taxon.taxomachine_node_id))
if include_anc:
if taxon.parent is not None:
output.write('Taxon {c} is a child of {p}.\n'.format(c=taxon.ott_id, p=taxon.parent.ott_id))
write_taxon_info(taxon.parent, True, output)
else:
output.write('Taxon {c} is the root of the taxonomy.'.format(c=taxon.ott_id))
def match_and_print(name_list, context_name, do_approximate_matching, include_dubious, include_deprecated,
include_subtree, output):
"""Demonstrates how to read the response from a match_names query when peyotl's wrap_response option is
used.
If the context_name is not recognized, the attempt to match_names will generate a ValueError exception.
Here this is caught, and we call the tnrs/contexts web service to get the list of valid context_names
to provide the user of the script with some hints.
"""
from peyotl.sugar import tnrs
try:
# Perform the match_names, and return the peyotl wrapper around the response.
result = ot_tnrs_match_names(name_list,
context_name=context_name,
do_approximate_matching=do_approximate_matching,
include_dubious=include_dubious,
include_deprecated=include_deprecated,
tnrs_wrapper=tnrs)
except Exception as x:
msg = str(x)
if 'is not a valid context name' in msg and context_name is not None:
# Here is a wrapper around the call to get the context names
valid_contexts = tnrs.contexts()
m = 'The valid context names are the strings in the values of the following "tnrs/contexts" dict:\n'
sys.stderr.write(m)
epp = pprint.PrettyPrinter(indent=4, stream=sys.stderr)
epp.pprint(valid_contexts)
raise RuntimeError('ot-tnrs-match-names: exception raised. {}'.format(x))
# The code below demonstrates how to access the information from the response in the wrapper
# that is created by using the wrap_response option in the call
output.write('A v2/tnrs/match_names query was performed using: {} \n'.format(tnrs.endpoint))
output.write('The taxonomy being served by that server is:')
output.write(' {}'.format(result.taxonomy.source))
output.write(' by {}\n'.format(result.taxonomy.author))
output.write('Information for the taxonomy can be found at {}\n'.format(result.taxonomy.weburl))
output.write('{} out of {} queried name(s) were matched\n'.format(len(result.matched_name_ids), len(name_list)))
output.write('{} out of {} queried name(s) were unambiguously matched\n'.format(len(result.unambiguous_name_ids),
len(name_list)))
output.write('The context_name for the matched names was "{}"'.format(result.context))
if result.context_inferred:
output.write(' (this context was inferred based on the matches).\n')
else:
output.write(' (this context was supplied as an argument to speed up the name matching).\n')
output.write('The name matching result(s) used approximate/fuzzy string matching? {}\n'.format(
result.includes_approximate_matches))
output.write('The name matching result(s) included dubious names? {}\n'.format(result.includes_dubious_names))
output.write('The name matching result(s) included deprecated taxa? {}\n'.format(result.includes_deprecated_taxa))
for name in name_list:
match_tuple = result[name]
output.write('The query name "{}" produced {} result(s):\n'.format(name, len(match_tuple)))
for match_ind, match in enumerate(match_tuple):
output.write(' Match #{}\n'.format(match_ind))
output.write(' OTT ID (ot:ottId) = {}\n'.format(match.ott_id))
output.write(' name (ot:ottTaxonName) = "{}"\n'.format(match.name))
output.write(' query was matched using fuzzy/approximate string matching? {}\n'.format(
match.is_approximate_match))
output.write(' match score = {}\n'.format(match.score))
output.write(' query name is a junior synonym of this match? {}\n'.format(match.is_synonym))
output.write(' is deprecated from OTT? {}\n'.format(match.is_deprecated))
output.write(' is dubious taxon? {}\n'.format(match.is_dubious))
if match.synonyms:
output.write(' known synonyms: "{}"\n'.format('", "'.join(match.synonyms)))
else:
output.write(' known synonyms: \n')
output.write(' OTT flags for this taxon: {}\n'.format(match.flags))
output.write(' The taxonomic rank associated with this name is: {}\n'.format(match.rank))
output.write(' The nomenclatural code for this name is: {}\n'.format(match.nomenclature_code))
output.write(' The (unstable) node ID in the current taxomachine instance is: {}\n'.format(
match.taxomachine_node_id))
if len(match_tuple) == 1:
sys.stderr.write('\nOnly one match found, so we will request the info on the ancestors, too...\n')
match = match_tuple[0]
ott_id = match.ott_id
fetch_and_write_taxon_info(id_list=[ott_id], include_anc=True, list_tips=False, output=output)
if include_subtree:
from peyotl.sugar import taxonomy
subtree = taxonomy.subtree(ott_id)['subtree']
output.write('The taxononmic subtree is:\n')
output.write(subtree)
output.write('\n')
else:
if include_subtree:
sys.stderr.write(
'\nMultiple matches found - ancestor info and subtreesuppressed.\nSee ot-taxon-info.py and ot-taxon-subtree.py which can be called with an OTT ID\n')
else:
sys.stderr.write(
'\nMultiple matches found - ancestor info suppressed.\nSee ot-taxon-info.py which can be called with an OTT ID\n')
def main(argv):
"""This function sets up a command-line option parser and then calls match_and_print
to do all of the real work.
"""
import argparse
description = 'Uses Open Tree of Life web services to try to find a taxon ID for each name supplied. ' \
'Using a --context-name=NAME to provide a limited taxonomic context and using the ' \
' --prohibit-fuzzy-matching option can make the matching faster. If there is only' \
'one match finds, then it also calls the equivalent of the ot-taxon-info.py and ot-taxon-subtree.py scripts.'
parser = argparse.ArgumentParser(prog='ot-tnrs-match-names', description=description)
parser.add_argument('names', nargs='+', help='name(s) for which we will try to find OTT IDs')
parser.add_argument('--context-name', default=None, type=str, required=False)
parser.add_argument('--include-dubious',
action='store_true',
default=False,
required=False,
help='return matches to taxa that are not included the synthetic tree because their taxonomic status is doubtful')
parser.add_argument('--subtree',
action='store_true',
default=False,
required=False,
help='print the newick representation of the taxonomic subtree if there is only one matching OTT ID')
parser.add_argument('--include-deprecated', action='store_true', default=False, required=False)
parser.add_argument('--prohibit-fuzzy-matching', action='store_true', default=False, required=False)
args = parser.parse_args(argv)
# The service takes do_approximate_matching
# We use the opposite to make the command-line just include positive directives
# (as opposed to requiring --do-approximate-matching=False) so we use "not"
do_approximate_matching = not args.prohibit_fuzzy_matching
name_list = args.names
if len(name_list) == 0:
name_list = ["Homo sapiens", "Gorilla gorilla"]
sys.stderr.write('Running a demonstration query with {}\n'.format(name_list))
else:
for name in name_list:
if name.startswith('-'):
parser.print_help()
match_and_print(name_list,
context_name=args.context_name,
do_approximate_matching=do_approximate_matching,
include_dubious=args.include_dubious,
include_deprecated=args.include_deprecated,
include_subtree=args.subtree,
output=sys.stdout)
if __name__ == '__main__':
try:
main(sys.argv[1:])
except Exception as x:
sys.exit('{}\n'.format(str(x)))