Skip to content

Commit

Permalink
get ncbi id using api.datanator.info
Browse files Browse the repository at this point in the history
  • Loading branch information
lzy7071 committed Jan 30, 2020
1 parent 6cf9395 commit bd79319
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 5 deletions.
22 changes: 20 additions & 2 deletions datanator/data_source/kegg_org_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,24 @@ def get_ncbi_id(self, name):
return int(id_list.group(1))
else:
return int(str(result).split(': ')[1])

def get_ncbi_id_rest(self, name):
"""Get ncbi taxonomy id of an organism using
api.datanator.info
Args:
name (:obj:`str`): Name of the organism.
Return:
(:obj:`int`): NCBI Taxonomy ID.
"""
endpoint = "https://api.datanator.info/ftx/text_search/num_of_index/?query_message={}&index=taxon_tree&from_=0&size=5&fields=tax_name&fields=name_txt".format(name)
r = requests.get(endpoint)
data = json.loads(r.text)
if data.get('taxon_tree', []) !=[]:
return data['taxon_tree'][0]['tax_id']
else:
return None

def has_href_and_id(self, tag):
return tag.has_attr('href') and tag.has_attr('id')
Expand Down Expand Up @@ -110,7 +128,7 @@ def make_bulk(self, offset=0, bulk_size=100):
if i < offset:
continue
if count < bulk_size:
ncbi_id = self.get_ncbi_id(name.split(' (')[0])
ncbi_id = self.get_ncbi_id_rest(name)
result.append({"kegg_organism_id": _id, "org_name": name,
'ncbi_taxonomy_id': ncbi_id})
count += 1
Expand Down Expand Up @@ -173,7 +191,7 @@ def bulk_load(self, bulk_size=100):
bulk_size(:obj:`int`): number of entries per insertion. Defaults to 100.
"""
length = bulk_size
count = 0
count = 16
while length != 0:
if count == self.max_entries:
break
Expand Down
11 changes: 8 additions & 3 deletions tests/data_source/test_kegg_org_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def setUpClass(cls):
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.cache_dirname)
cls.src.db.drop_collection(self.src.collection_str)
cls.src.db.drop_collection(cls.src.collection_str)

@unittest.skip('passed')
def test_parse_ids(self):
Expand Down Expand Up @@ -94,11 +94,16 @@ def test_matching_species_name_id(self):
result = {name: _id}
self.assertEqual(result, {'Candidatus Bathyarchaeota archaeon BA2': 1700836})

# @unittest.skip('passed')
@unittest.skip('passed')
def test_get_ncbi_id(self):
name = 'Ornithobacterium rhinotracheale ORT-UMN 88'
self.assertEqual(self.src.get_ncbi_id(name), 1401325)
name = 'latoieruwerwe'
self.assertEqual(self.src.get_ncbi_id(name), None)
name = 'Pan troglodytes'
self.assertEqual(self.src.get_ncbi_id(name), 9598)
self.assertEqual(self.src.get_ncbi_id(name), 9598)

@unittest.skip('passed')
def test_get_ncbi_id_rest(self):
name = "homo sapiens (human)"
self.assertEqual(self.src.get_ncbi_id_rest(name), 9606)

0 comments on commit bd79319

Please sign in to comment.