In [2]:
!ls

eBird_phylo.ipynb   mcd2.ods		 merced_birdlist.txt  mercedlist.ods
ebird_taxonomy.txt  merced_birdlist.csv  merced_ebird.json


In [20]:
import sys
import json
from opentree import OT

In [21]:
#text scraped from https://ebird.org/region/US-CA-047?yr=all&m=&rank=mrec&hs_sortBy=date&hs_o=desc
fi = open("merced_birdlist.csv").readlines()

In [23]:
bird_dict = {}
for lin in fi:
    spp_name = lin.split(',')[-1].strip()
    if spp_name not in bird_dict:
        ott_id = OT.get_ottid_from_name(spp_name)
        bird_dict[spp_name]=ott_id

In [24]:
len(bird_dict)

302

In [25]:
ott_ids = set(bird_dict.values())
if None in ott_ids:
    ott_ids.remove(None)


In [213]:
def remove_problem_characters(instr, prob_char = "():#", replace_w = '?'):
    problem_characters = set(prob_char)
    for char in problem_characters:
        instr = instr.replace(char,replace_w)
    return instr

In [214]:
output = OT.synth_induced_tree(ott_ids=list(ott_ids),  label_format='name')

#Some of these taxon names are still causing issues...
for taxon in output.tree.taxon_namespace:
    taxon.label = remove_problem_characters(taxon.label)

for node in output.tree:
    if node.label:
        node.label = remove_problem_characters(node.label)

In [215]:
trefile = "MercedBirds.tre"
#Get the synthetic tree from OpenTree and write out the citations to a text file.
output.tree.write(path = trefile, schema = "newick")
sys.stdout.write("Tree written to {}\n".format(trefile))


Tree written to MercedBirds.tre


## Don't forget to cite your friendly phylogeneticists!
studies = output.response_dict['supporting_studies']
cites = OT.get_citations(studies) #this can be a bit slow
fi = open("merced_ebird_cites.txt","w")
fi.write(cites)
fi.close()

In [25]:
print("{} published studies went into this tree".format(len(studies)))

105 published studies went into this tree


In [19]:
print(cites)

https://tree.opentreeoflife.org/curator/study/view/ot_873?tab=trees&tree=tree1
Blechschmidt, Karin, Hans-Ulrich Peter, Jacobus De Korte, Michael Wink, Ingrid Seibold, Andreas J. Helbig. 1993. Investigations on the molecular systematics of skuas (Stercorariidae). Zool. Jb. Syst. 120: 379-387.
http://www.uni-heidelberg.de/institute/fak14/ipmb/phazb/pubwink/1993/1.%201993.pdf

https://tree.opentreeoflife.org/curator/study/view/ot_816?tab=trees&tree=tree1
Gibson, Rosemary, Allan Baker. 2012. Multiple gene sequences resolve phylogenetic relationships in the shorebird suborder Scolopaci (Aves: Charadriiformes). Molecular Phylogenetics and Evolution 64 (1): 66-72
http://dx.doi.org/10.1016/j.ympev.2012.03.008

https://tree.opentreeoflife.org/curator/study/view/ot_425?tab=trees&tree=tree1
Stein, R. Will, Joseph W. Brown, Arne Ø. Mooers. 2015. A molecular genetic time scale demonstrates Cretaceous origins and multiple diversification rate shifts within the order Galliformes (Aves). Molecular Phy

In [29]:
# From git@github.com:OpenTreeOfLife/ChronoSynth.git
dates = json.load(open("../../ChronoSynth/node_ages.json"))

In [179]:
idoutput = OT.synth_induced_tree(ott_ids=list(ott_ids),  label_format='id')


In [180]:
ages = open("ages",'w')

dated_nodes = set()
undated_nodes = set()
for node in idoutput.tree:
    lab = None
    if node.label:
        if node.label.startswith('mrca'):
            lab = node.label
        elif node.label.startswith('ott'):
            lab = node.label
        else:
            lab = node.label.split()[-1]
        if lab in dates['node_ages']:
            dated_nodes.add(lab)
            age_range = [float(source['age']) for source in dates['node_ages'][lab]]
            age_range.sort()
            age_est = sum(age_range) / len(age_range) 
            ages.write("{}\t{}\n".format(node.label, age_est))
            node.label = age_est
        else:
            node.label = "-"
            undated_nodes.add(lab)


len(dated_nodes)

991

In [181]:
trefile = "MercedBirds_ID_annot.tre"
#Get the synthetic tree from OpenTree and write out the citations to a text file.
idoutput.tree.write(path = trefile, schema = "newick")
            

In [161]:
len(undated_nodes)

225

In [205]:
! phylocom bladj -f MercedBirds_ID.tre > blad.tre

In [186]:
import os
# Download and extract from https://tree.opentreeoflife.org/about/taxonomy-version/ott3.2
taxonomy_file = "../../ott3.2/taxonomy.tsv"
assert os.path.exists(taxonomy_file)

In [189]:
tax_dict = {}
for lin in open(taxonomy_file):
    lii = lin.split('\t')
    tax_dict[lii[0]] = lii[4]

In [210]:
import dendropy
dated = dendropy.Tree.get(path="blad.tre", schema='newick')

In [211]:
for tax in dated.taxon_namespace:
    if tax.label in dated_nodes:
        dat = "*"
    else:
        dat = ""
    if tax.label.startswith('ott'):
        tax.label = tax_dict[tax.label.strip('ott')] + dat
        
for node in dated:
    if node.label:
        if node.label in dated_nodes:
            dat = "*"
        else:
            dat = ""
        if node.label.startswith('ott'):
            node.label = tax_dict[node.label.strip('ott')] + dat
        if node.label.startswith('mrca'):
            node.label = dat
            
        

In [212]:
trefile = "MercedBirds_ID_annot.tre"
#Get the synthetic tree from OpenTree and write out the citations to a text file.
dated.write(path = trefile, schema = "newick")