@@ -64,6 +64,35 @@ def get_meta_by_id(self, _id):
64
64
result .append (doc )
65
65
return result
66
66
67
+ def get_ortho_by_id (self , _id ):
68
+ '''
69
+ Get protein's metadata given uniprot id
70
+
71
+ Args:
72
+ _id (:obj:`str`): uniprot id.
73
+
74
+ Returns:
75
+ (:obj:`list` of :obj:`dict`): list of information.
76
+ '''
77
+ result = []
78
+ query = {'uniprot_id' : _id }
79
+ doc = self .collection .find_one (filter = query , projection = {"_id" : 0 })
80
+ if doc is None :
81
+ return {'uniprot_id' : 'None' ,
82
+ 'entry_name' : 'None' ,
83
+ 'gene_name' : 'None' ,
84
+ 'protein_name' : 'None' ,
85
+ 'canonical_sequence' : 'None' ,
86
+ 'length' : 99999999 ,
87
+ 'mass' : '99999999' ,
88
+ 'abundances' : [],
89
+ 'ncbi_taxonomy_id' : 99999999 ,
90
+ 'species_name' : '99999999' }
91
+ else :
92
+ doc = json .loads (json .dumps (doc , ignore_nan = True ))
93
+ result .append (doc )
94
+ return result
95
+
67
96
def get_meta_by_name_taxon (self , name , taxon_id ):
68
97
'''
69
98
Get protein's metadata given protein name
@@ -869,4 +898,90 @@ def get_all_kegg(self, ko, anchor, max_distance):
869
898
species_canon_ancestor = obj [species + '_canon_ancestors' ]
870
899
doc ['canon_ancestors' ] = species_canon_ancestor
871
900
result [distance - 1 ]['documents' ].append (doc )
901
+ return result
902
+
903
+ def get_all_ortho (self , ko , anchor , max_distance ):
904
+ '''Get replacement abundance value by taxonomic distance
905
+ with the same OrthoDB group number.
906
+
907
+ Args:
908
+ ko (:obj:`str`): OrthoDB group id to query for.
909
+ anchor (:obj:`str`): anchor species' name.
910
+ max_distance (:obj:`int`): max taxonomic distance from origin protein allowed for
911
+ proteins in results.
912
+ max_depth (:obj:`int`) max depth allowed from the common node.
913
+
914
+ Returns:
915
+ (:obj:`list` of :obj:`dict`): list of result proteins and their info
916
+ [
917
+ {'distance': 1, 'documents': [{}, {}, {} ...]},
918
+ {'distance': 2, 'documents': [{}, {}, {} ...]}, ...].
919
+ '''
920
+ if max_distance <= 0 :
921
+ return 'Please use get_abundance_by_id to check self abundance values'
922
+
923
+ result = []
924
+ for i in range (max_distance ):
925
+ result .append ({'distance' : i + 1 , 'documents' : []})
926
+
927
+ projection = {
928
+ 'orthodb_id' : 1 ,
929
+ 'orthodb_name' : 1 ,
930
+ 'ancestor_name' : 1 ,
931
+ 'ncbi_taxonomy_id' : 1 ,
932
+ 'abundances' : 1 ,
933
+ 'species_name' : 1 ,
934
+ 'uniprot_id' : 1 ,
935
+ '_id' : 0 ,
936
+ 'ancestor_taxon_id' : 1 ,
937
+ 'protein_name' : 1 ,
938
+ 'gene_name' : 1 ,
939
+ 'modifications' : 1
940
+ }
941
+ con_0 = {'orthodb_id' : ko }
942
+ con_1 = {'abundances' : {'$exists' : True }}
943
+ query = {'$and' : [con_0 , con_1 ]}
944
+ docs = self .collection .find (filter = query , projection = projection )
945
+ queried = deque ()
946
+ names = {}
947
+ for doc in docs :
948
+ doc = json .loads (json .dumps (doc , ignore_nan = True ))
949
+ species = doc .get ('species_name' )
950
+ if species is None and species not in queried :
951
+ taxon_id = doc ['ncbi_taxonomy_id' ]
952
+ species = self .db_obj ['taxon_tree' ].find_one ({"tax_id" : taxon_id })['tax_name' ]
953
+ queried .append (taxon_id )
954
+ names [taxon_id ] = species
955
+ elif species is None and species in queried :
956
+ species = names [doc ['ncbi_taxonomy_id' ]]
957
+ obj = self .taxon_manager .get_canon_common_ancestor_fast (anchor , species , org_format = 'tax_name' )
958
+ distance = obj [anchor ]
959
+ if distance != - 1 and distance <= max_distance :
960
+ species_canon_ancestor = obj [species + '_canon_ancestors' ]
961
+ doc ['canon_ancestors' ] = species_canon_ancestor
962
+ result [distance - 1 ]['documents' ].append (doc )
963
+ return result
964
+
965
+ def get_info_by_orthodb (self , orthodb ):
966
+ '''
967
+ Find all proteins with the same kegg orthology id.
968
+
969
+ Args:
970
+ orthodb(:obj:`str`): kegg orthology ID.
971
+
972
+ Returns:
973
+ (:obj:`list` of :obj:`dict`): list of dictionary containing
974
+ protein's uniprot_id and kegg information
975
+ [{'orthodb_id': ... 'orthodb_name': ... 'uniprot_ids': []},
976
+ {'orthodb_id': ... 'orthodb_name': ... 'uniprot_ids': []}].
977
+ '''
978
+ ko = orthodb .lower ()
979
+ result = [{'orthodb_id' : ko , 'uniprot_ids' : []}]
980
+ query = {'orthodb_id' : ko }
981
+ projection = {'uniprot_id' : 1 , '_id' : 0 , 'orthodb_name' : 1 , 'orthodb_id' : 1 }
982
+ docs = self .collection .find (filter = query , projection = projection )
983
+
984
+ for doc in docs :
985
+ result [0 ]['orthodb_name' ] = doc .get ('orthodb_name' , ['no name' ])
986
+ result [0 ]['uniprot_ids' ].append (doc .get ('uniprot_id' ))
872
987
return result
0 commit comments