-
Notifications
You must be signed in to change notification settings - Fork 10
/
zooma.py
164 lines (132 loc) · 6.35 KB
/
zooma.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from enum import Enum
from functools import total_ordering
import logging
from cmat.trait_mapping.ols import get_ontology_label_from_ols, is_current_and_in_ontology, is_in_ontology
from cmat.trait_mapping.utils import json_request
logger = logging.getLogger(__package__)
@total_ordering
class ZoomaConfidence(Enum):
"""Enum to represent the confidence of a mapping in Zooma."""
LOW = 1
MEDIUM = 2
GOOD = 3
HIGH = 4
def __eq__(self, other):
if not isinstance(other, type(self)):
return False
return self.value == other.value
def __lt__(self, other):
return self.value < other.value
def __str__(self):
return self.name
@total_ordering
class ZoomaMapping:
"""Representation of one ontology term in a mapping in Zooma."""
def __init__(self, uri, confidence, source):
self.uri = uri
self.confidence = ZoomaConfidence[confidence.upper()]
self.source = source
self.ontology_label = ""
self.in_ontology = False
# For non-EFO mappings, `is_current` property does not make sense and it not used
self.is_current = False
def __eq__(self, other):
if not isinstance(other, type(self)):
return False
if (self.uri != other.uri or self.confidence != other.confidence
or self.ontology_label != other.ontology_label or self.in_ontology != other.in_ontology
or self.is_current != other.is_current):
return False
return True
def __lt__(self, other):
return ((self.confidence, self.in_ontology, self.is_current) <
(other.confidence, other.in_ontology, other.is_current))
class ZoomaResult:
"""
A mapping in Zooma from one term, which can contain multiple ontology IDs mapped to. One
term can be mapped to multiple mappings.
"""
def __init__(self, uri_list, zooma_label, confidence, source):
self.uri_list = uri_list
self.zooma_label = zooma_label
self.confidence = confidence
self.source = source
self.mapping_list = []
for uri in uri_list:
self.mapping_list.append(ZoomaMapping(uri, confidence, source))
def __str__(self):
return "{}, {}, {}, {}".format(self.zooma_label, self.confidence, self.source,
self.mapping_list)
def __eq__(self, other):
if not isinstance(other, type(self)):
return False
return (self.uri_list == other.uri_list, self.zooma_label == other.zooma_label,
self.confidence == other.confidence, self.source == other.source,
self.mapping_list == other.mapping_list)
def get_zooma_results(trait_name: str, filters: dict, zooma_host: str, target_ontology: str = 'EFO') -> list:
"""
Given a trait name, Zooma filters in a dict and a hostname to use, query Zooma and return a list
of Zooma mappings for this trait.
First get the URI, label from a selected source, confidence and source:
http://snarf.ebi.ac.uk:8580/spot/zooma/v2/api/services/annotate?propertyValue=intellectual+disability
Then the ontology label to replace the label from a source:
https://www.ebi.ac.uk/ols/api/terms?iri=http%3A%2F%2Fwww.ebi.ac.uk%2Fefo%2FEFO_0003847
:param trait_name: A string containing a trait name from a ClinVar record.
:param filters: A dictionary containing filters used when querying OxO
:param zooma_host: Hostname of a Zooma instance to query.
:param target_ontology: ID of target ontology (default EFO)
:return: List of ZoomaResults
"""
url = build_zooma_query(trait_name, filters, zooma_host)
zooma_response_list = json_request(url)
if zooma_response_list is None:
return []
zooma_result_list = get_zooma_results_for_trait(zooma_response_list)
for zooma_result in zooma_result_list:
for zooma_mapping in zooma_result.mapping_list:
label = get_ontology_label_from_ols(zooma_mapping.uri)
if label is not None:
zooma_mapping.ontology_label = label
else:
# If no label is returned (because OLS failed to provide it), keep the existing one from ZOOMA
zooma_mapping.ontology_label = zooma_result.zooma_label
uri_is_current_and_in_ontology = is_current_and_in_ontology(zooma_mapping.uri, target_ontology)
if not uri_is_current_and_in_ontology:
uri_is_in_ontology = is_in_ontology(zooma_mapping.uri, target_ontology)
zooma_mapping.in_ontology = uri_is_in_ontology
else:
zooma_mapping.in_ontology = uri_is_current_and_in_ontology
zooma_mapping.is_current = uri_is_current_and_in_ontology
return zooma_result_list
def build_zooma_query(trait_name: str, filters: dict, zooma_host: str) -> str:
"""
Given a trait name, filters and hostname, create a url with which to query Zooma. Return this
url.
:param trait_name: A string containing a trait name from a ClinVar record.
:param filters: A dictionary containing filters used when querying OxO
:param zooma_host: Hostname of a Zooma instance to query.
:return: String of a url which can be requested
"""
url = "{}/spot/zooma/v2/api/services/annotate?propertyValue={}".format(zooma_host, trait_name)
url_filters = [
"required:[{}]".format(filters["required"]),
"ontologies:[{}]".format(filters["ontologies"]),
"preferred:[{}]".format(filters["preferred"])
]
url += "&filter={}".format(",".join(url_filters))
return url
def get_zooma_results_for_trait(zooma_response_list: list) -> list:
"""
Given a response from a Zooma request return ZoomaResults based upon the data in that request.
:param zooma_response_list: A json (dict) response from a Zooma request.
:return: List of ZoomaResulst in the Zooma response.
"""
result_list = []
for response in zooma_response_list:
# uri_list = ",".join(result["semanticTags"])
uris = response["semanticTags"]
zooma_label = response["annotatedProperty"]["propertyValue"]
confidence = response["confidence"]
source_name = response["derivedFrom"]["provenance"]["source"]["name"]
result_list.append(ZoomaResult(uris, zooma_label, confidence, source_name))
return result_list