/
annotationResolver.py
128 lines (110 loc) · 5.18 KB
/
annotationResolver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import bioservices
import pyparsing as pyp
goGrammar = pyp.Suppress(pyp.Literal('<name>')) + pyp.Word(pyp.alphanums + ' -_/') + pyp.Suppress(pyp.Literal('</name>'))
from contextlib import contextmanager
import sys, os
@contextmanager
def suppress_stdout():
with open(os.devnull, "w") as devnull:
old_stdout = sys.stdout
old_stderr = sys.stderr
sys.stdout = devnull
sys.stderr = devnull
try:
yield
finally:
sys.stdout = old_stdout
sys.stderr = old_stderr
def resolveAnnotation(annotation):
with suppress_stdout():
return resolveAnnotationHelper(annotation)
def resolveAnnotationHelper(annotation):
if not hasattr(resolveAnnotation, 'db'):
resolveAnnotation.db = {}
resolveAnnotation.ch = bioservices.ChEBI(verbose=False)
resolveAnnotation.uni = bioservices.UniProt(verbose=False)
resolveAnnotation.k = bioservices.kegg.KEGG(verbose=False)
resolveAnnotation.qg = bioservices.QuickGO(verbose=False)
resolveAnnotation.t = bioservices.Taxon()
resolveAnnotation.db['http://identifiers.org/uniprot/P62988'] = 'http://identifiers.org/uniprot/P62988'
resolveAnnotation.db['http://identifiers.org/uniprot/P06842'] = 'http://identifiers.org/uniprot/P06842'
resolveAnnotation.db['http://identifiers.org/uniprot/P07006'] = 'http://identifiers.org/uniprot/P06842'
if annotation in resolveAnnotation.db:
return annotation,resolveAnnotation.db[annotation]
tAnnotation = annotation.replace('%3A',':')
tAnnotation = annotation.split('/')[-1]
#tAnnotation = re.search(':([^:]+:[^:]+$)',tAnnotation).group(1)
try:
if 'obo.go' in annotation or '/go/GO' in annotation:
res = resolveAnnotation.qg.Term(tAnnotation)
finalArray = []
if type(res) not in [int]:
res = bioservices.Service('name').easyXML(res)
tmp = res.findAll('name')
for x in tmp:
try:
tagString = str(goGrammar.parseString(str(x))[0])
if tagString not in ['Systematic synonym']:
finalArray.append(str(goGrammar.parseString(str(x))[0]))
except pyp.ParseBaseException:
continue
if len(finalArray) > 0:
resolveAnnotation.db[annotation] = finalArray[0]
else:
resolveAnnotation.db[annotation] = ''
finalAnnotation = resolveAnnotation.db[annotation]
elif 'kegg' in annotation:
data = resolveAnnotation.k.get(tAnnotation)
dict_data = resolveAnnotation.k.parse(data)
if type(dict_data) == int:
resolveAnnotation.db[annotation] = ''
else:
resolveAnnotation.db[annotation] = dict_data['name']
finalAnnotation = resolveAnnotation.db[annotation]
elif 'uniprot' in annotation:
identifier = annotation.split('/')[-1]
result = resolveAnnotation.uni.quick_search(identifier)
if identifier in result:
resolveAnnotation.db[annotation] = result[identifier]['Protein names'].split('(')[0]
else:
finalAnnotation = ''
finalAnnotation = resolveAnnotation.db[annotation]
elif 'chebi' in annotation:
tmp = annotation.split('/')[-1]
entry = resolveAnnotation.ch.getLiteEntity(tmp)
finalAnnotation = ''
for element in entry:
resolveAnnotation.db[annotation] = str(element['chebiAsciiName'])
finalAnnotation = resolveAnnotation.db[annotation]
elif 'cco' in annotation or 'pirsf' in annotation or 'pubchem' in annotation or 'omim' in annotation:
finalAnnotation = ''
elif 'taxonomy' in annotation:
#uniprot stuff for taxonomy
result = resolveAnnotation.t.search_by_taxon(tAnnotation)
resolveAnnotation.db[annotation] = result['Scientific Name']
finalAnnotation = resolveAnnotation.db[annotation]
'''
url = 'http://www.uniprot.org/taxonomy/'
params = {
'from':'ACC',
'to':'P_REFSEQ_AC',
'format':'tab',
'query':'P13368 P20806 Q9UM73 P97793 Q17192'
}
data = urllib.urlencode(params)
request = urllib2.Request(url, data)
contact = "" # Please set your email address here to help us debug in case of problems.
request.add_header('User-Agent', 'Python contact')
response = urllib2.urlopen(request)
page = response.read(200000)
'''
else:
return annotation,''
#assert(False)
finalAnnotation = ''
except (IOError,KeyError) as e:
return annotation,''
return annotation,finalAnnotation
if __name__ == "__main__":
print resolveAnnotation('http://identifiers.org/taxonomy/10116')
#print resolveAnnotation('http://identifiers.org/uniprot/P01133')