Skip to content

Commit

Permalink
fix bulk TNRS sources
Browse files Browse the repository at this point in the history
  • Loading branch information
snacktavish committed Aug 10, 2019
1 parent 707018f commit 2c7dc69
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 5 deletions.
7 changes: 4 additions & 3 deletions physcraper/opentree_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ def bulk_tnrs_load(filename, ids_obj = None):
otu_dict[otu]["^ot:ottTaxonName"] = name["ottTaxonName"]
if name.get("ottId"):
otu_dict[otu]["^ot:ottId"] = name["ottId"]
for source in name.get("taxonomicSources"):
taxsrc = source.split(":")
otu_dict[otu]["^{}:taxon".format(taxsrc[0])] = source.strip(taxsrc[1])
for source in name.get("taxonomicSources", []):
if source:
taxsrc = source.split(":")
otu_dict[otu]["^{}:taxon".format(taxsrc[0])] = taxsrc[1]
for otu in otu_dict:
otu_dict[otu]["^physcraper:status"] = "original"
otu_dict[otu]["^physcraper:last_blasted"] = None
Expand Down
2 changes: 1 addition & 1 deletion physcraper/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,7 +738,7 @@ def filter_seqs(self, tmp_dict, threshold=5, type="random"):
aln_sp_d = self.make_sp_dict(aln_otus)
debug("There are {} taxa in aln".format(len(aln_sp_d)))
alltax = set(new_sp_d.keys()).union(aln_sp_d.keys())
sys.stdout.write("{} taxa in orginal alignment; {} taxa in updated alignemnt {}, keeping max {} seq per taxon".format(len(aln_sp_d), len(alltax)), threshold)
sys.stdout.write("taxa in orginal alignment; {} taxa in updated alignemnt {}, keeping max {} seq per taxon".format(len(aln_sp_d), len(alltax), threshold))
for tax_id in new_sp_d:
debug(" {} new seqs for taxon {}".format(len(new_sp_d[tax_id]), tax_id))
tax_otus = []
Expand Down
2 changes: 1 addition & 1 deletion tests/test_treetaxon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


json_file = "tests/data/treetaxon/main.json"
exp_otu_dict = {u'Otuname11': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Phrynops', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 66456, '^ot:originalLabel': u'phrynops'}, u'Otuname10': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Emys orbicularis', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 733093, '^ot:originalLabel': u'emys_orbicularis'}, u'Otuname13': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Caretta', '^irmng:taxon': u'irmng:', '^worms:taxon': u'worms:', '^gbif:taxon': u'gbif:', '^ot:ottId': 66463, '^ot:originalLabel': u'caretta'}, u'Otuname12': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Caiman', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 335589, '^ot:originalLabel': u'caiman'}, u'Otuname15': {'^ncbi:taxon': u'ncbi:', '^ot:ottId': 284917, '^ot:originalLabel': u'chelonoidis_nigra', '^gbif:taxon': u'gbif:', '^ot:ottTaxonName': u'Chelonoidis nigra'}, u'Otuname14': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Python', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 675102, '^ot:originalLabel': u'python'}, u'Otuname16': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Podarcis', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 937560, '^ot:originalLabel': u'podarcis'}, u'Otuname9': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Alligator', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 335593, '^ot:originalLabel': u'alligator'}, u'Otuname8': {'^ncbi:taxon': u'ncbi:', '^irmng:taxon': u'irmng:', '^ot:ottId': 465090, '^ot:originalLabel': u'Xenopus', '^ot:ottTaxonName': u'Xenopus (genus in Deuterostomia)'}, u'Otuname1': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Protopterus', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 199350, '^ot:originalLabel': u'protopterus'}, u'Otuname3': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Gallus', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 153562, '^ot:originalLabel': u'Gallus'}, u'Otuname2': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Anolis', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 705358, '^ot:originalLabel': u'Anolis'}, u'Otuname5': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Monodelphis', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 122359, '^ot:originalLabel': u'Monodelphis'}, u'Otuname4': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Homo', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 770309, '^ot:originalLabel': u'Homo'}, u'Otuname7': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Taeniopygia', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 708325, '^ot:originalLabel': u'Taeniopygia'}, u'Otuname6': {'^ncbi:taxon': u'ncbi:', '^ot:ottTaxonName': u'Ornithorhynchus', '^irmng:taxon': u'irmng:', '^gbif:taxon': u'gbif:', '^ot:ottId': 962391, '^ot:originalLabel': u'Ornithorhynchus'}}
exp_otu_dict = {u'Otuname11': {'^ncbi:taxon': u'8462', '^ot:ottTaxonName': u'Phrynops', '^irmng:taxon': u'1201383', '^gbif:taxon': u'2442114', '^physcraper:status': 'original', '^ot:ottId': 66456, '^ot:originalLabel': u'phrynops', '^physcraper:last_blasted': None}, u'Otuname10': {'^ncbi:taxon': u'82168', '^ot:ottTaxonName': u'Emys orbicularis', '^irmng:taxon': u'11010173', '^gbif:taxon': u'5220538', '^physcraper:status': 'original', '^ot:ottId': 733093, '^ot:originalLabel': u'emys_orbicularis', '^physcraper:last_blasted': None}, u'Otuname13': {'^ncbi:taxon': u'8466', '^ot:ottTaxonName': u'Caretta', '^irmng:taxon': u'1324374', '^worms:taxon': u'137066', '^gbif:taxon': u'2442177', '^physcraper:status': 'original', '^ot:ottId': 66463, '^ot:originalLabel': u'caretta', '^physcraper:last_blasted': None}, u'Otuname12': {'^ncbi:taxon': u'8497', '^ot:ottTaxonName': u'Caiman', '^irmng:taxon': u'1010136', '^gbif:taxon': u'5220195', '^physcraper:status': 'original', '^ot:ottId': 335589, '^ot:originalLabel': u'caiman', '^physcraper:last_blasted': None}, u'Otuname15': {'^ncbi:taxon': u'66189', '^ot:ottTaxonName': u'Chelonoidis nigra', '^gbif:taxon': u'5220266', '^physcraper:status': 'original', '^ot:ottId': 284917, '^ot:originalLabel': u'chelonoidis_nigra', '^physcraper:last_blasted': None}, u'Otuname14': {'^ncbi:taxon': u'37579', '^ot:ottTaxonName': u'Python', '^irmng:taxon': u'1031494', '^gbif:taxon': u'2454645', '^physcraper:status': 'original', '^ot:ottId': 675102, '^ot:originalLabel': u'python', '^physcraper:last_blasted': None}, u'Otuname16': {'^ncbi:taxon': u'42163', '^ot:ottTaxonName': u'Podarcis', '^irmng:taxon': u'1304163', '^gbif:taxon': u'2468993', '^physcraper:status': 'original', '^ot:ottId': 937560, '^ot:originalLabel': u'podarcis', '^physcraper:last_blasted': None}, u'Otuname9': {'^ncbi:taxon': u'8495', '^ot:ottTaxonName': u'Alligator', '^irmng:taxon': u'1039645', '^gbif:taxon': u'2441367', '^physcraper:status': 'original', '^ot:ottId': 335593, '^ot:originalLabel': u'alligator', '^physcraper:last_blasted': None}, u'Otuname8': {'^ncbi:taxon': u'8353', '^ot:ottTaxonName': u'Xenopus (genus in Deuterostomia)', '^irmng:taxon': u'1382944', '^physcraper:status': 'original', '^ot:ottId': 465090, '^ot:originalLabel': u'Xenopus', '^physcraper:last_blasted': None}, u'Otuname1': {'^ncbi:taxon': u'7885', '^ot:ottTaxonName': u'Protopterus', '^irmng:taxon': u'1295830', '^gbif:taxon': u'2441252', '^physcraper:status': 'original', '^ot:ottId': 199350, '^ot:originalLabel': u'protopterus', '^physcraper:last_blasted': None}, u'Otuname3': {'^ncbi:taxon': u'9030', '^ot:ottTaxonName': u'Gallus', '^irmng:taxon': u'1278118', '^gbif:taxon': u'2473720', '^physcraper:status': 'original', '^ot:ottId': 153562, '^ot:originalLabel': u'Gallus', '^physcraper:last_blasted': None}, u'Otuname2': {'^ncbi:taxon': u'28376', '^ot:ottTaxonName': u'Anolis', '^irmng:taxon': u'1301983', '^gbif:taxon': u'2468081', '^physcraper:status': 'original', '^ot:ottId': 705358, '^ot:originalLabel': u'Anolis', '^physcraper:last_blasted': None}, u'Otuname5': {'^ncbi:taxon': u'13615', '^ot:ottTaxonName': u'Monodelphis', '^irmng:taxon': u'1325350', '^gbif:taxon': u'7967492', '^physcraper:status': 'original', '^ot:ottId': 122359, '^ot:originalLabel': u'Monodelphis', '^physcraper:last_blasted': None}, u'Otuname4': {'^ncbi:taxon': u'9605', '^ot:ottTaxonName': u'Homo', '^irmng:taxon': u'1035772', '^gbif:taxon': u'2436435', '^physcraper:status': 'original', '^ot:ottId': 770309, '^ot:originalLabel': u'Homo', '^physcraper:last_blasted': None}, u'Otuname7': {'^ncbi:taxon': u'59728', '^ot:ottTaxonName': u'Taeniopygia', '^irmng:taxon': u'1265687', '^gbif:taxon': u'2493632', '^physcraper:status': 'original', '^ot:ottId': 708325, '^ot:originalLabel': u'Taeniopygia', '^physcraper:last_blasted': None}, u'Otuname6': {'^ncbi:taxon': u'9257', '^ot:ottTaxonName': u'Ornithorhynchus', '^irmng:taxon': u'1107086', '^gbif:taxon': u'2433375', '^physcraper:status': 'original', '^ot:ottId': 962391, '^ot:originalLabel': u'Ornithorhynchus', '^physcraper:last_blasted': None}}
treefile = "tests/data/treetaxon/turtle.fa.1.treefile"
treeout = "tests/data/tmp/ottid.tre"
expected_tree = "tests/data/treetaxon/ottid.tre"
Expand Down

0 comments on commit 2c7dc69

Please sign in to comment.