## Getting an evolutionary tree for data in a region using GBIF and OpenTree of Life
### Example from the University of California, Merced Vernal Pools Reserve
https://vernalpools.ucmerced.edu/

More info and context at https://github.com/McTavishLab/biodiversity_next/blob/master/biodiversity_next.pdf



### Get occurence data from GBIF
Set polygon: https://www.gbif.org/occurrence/search?country=US&has_coordinate=true&has_geospatial_issue=false&taxon_key=1&advanced=1&geometry=POLYGON((-120.45565%2037.35309,-120.36587%2037.35309,-120.36587%2037.44063,-120.45565%2037.44063,-120.45565%2037.35309))

Download records.




In [4]:
import sys
from opentree import OT

In [5]:
input_gbif_file = "example.csv"

In [9]:
fi = open(input_gbif_file)
header = fi.readline().split('\t')


#Get indexes for each column in the csv file
col_dict = {}
for i, col in enumerate(header):
    col_dict[col] = i


sys.stdout.write("Matching ids\n")

match_dict = {}
gbif_ids = []
ott_ids = []
i = 0
#Looop through each line in the file
for lin in fi:
    i += 1
    sys.stdout.write(".") #progress bar
    sys.stdout.flush()
    lii = lin.split('\t')
    gb_id = lii[col_dict['taxonKey']]
    if gb_id in match_dict:
        #Skip gb_id's you have already matched
        pass
    else:
        # Do a direct match to gbif id's in the open tree taxnomy
        gbiftax = "gbif:{}".format(int(gb_id))
        res = OT.taxon_info(source_id=gbiftax)
        if res.status_code == 200:
            ott_id = int(res.response_dict['ott_id'])
            match_dict[gb_id] = ott_id
        if res.status_code == 400:
            # If GBIF id isn' found in the open tree taxonomy, search on scientific name
            spp_name = lii[col_dict['verbatimScientificName']]
            sys.stdout.write("{},{} not matched on ID\n".format(gbiftax, spp_name))
            res2 = OT.tnrs_match([spp_name])
            if res2.status_code == 200:
                if len(res2.response_dict['results']) > 0:
                    ott_id = int(res2.response_dict['results'][0]['matches'][0]['taxon']['ott_id'])
                    match_dict[gb_id] = ott_id
                    ott_ids.append(ott_id)
                    sys.stdout.write("{},{} matched on name to ott id{}\n".format(gbiftax, spp_name, ott_id))
                else:
                    sys.stdout.write("{},{} still NO MATCH\n".format(gbiftax, spp_name))
                    match_dict[gb_id] = None
        ott_ids.append(ott_id)


Matching ids
........................gbif:5229155,Pelecanus erythrorhynchos not matched on ID
gbif:5229155,Pelecanus erythrorhynchos matched on name to ott id316989
....gbif:9088491,Dryobates nuttallii not matched on ID
gbif:9088491,Dryobates nuttallii matched on name to ott id701703
..gbif:5231677,Mimus polyglottos not matched on ID
gbif:5231677,Mimus polyglottos matched on name to ott id571310
......................................gbif:2498167,Anser caerulescens not matched on ID
gbif:2498167,Anser caerulescens matched on name to ott id190878
...........gbif:2498161,Anser rossii not matched on ID
gbif:2498161,Anser rossii matched on name to ott id767830
....................

In [11]:
ott_ids = set(ott_ids)
if None in ott_ids:
    ott_ids.remove(None)

trefile = "names.tre"
#Get the synthetic tree from OpenTree and write out the citations to a text file.
output = OT.synth_induced_tree(ott_ids=list(ott_ids),  label_format='name')
output.tree.write(path = "names.tre", schema = "newick")
sys.stdout.write("Tree written to {}\n".format(trefile))


Tree written to names.tre


In [12]:
studies = output.response_dict['supporting_studies']

for study in studies:
    studyid = study.split('@')[0]
    studyres = OT.find_studies(studyid, search_property = 'study_id')
    
    new_cite = studyres.response_dict.get('matched_studies')
    if new_cite:
        f.write(to_string(new_cite[0].get('ot:studyPublicationReference', '')) + '\n' + new_cite[0].get('ot:studyPublication', '') + '\n')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [18]:
studyres.__dict__

{'_request_url': 'https://api.opentreeoflife.org/v3/studies/find_studies',
 '_request_headers': {'content-type': 'application/json',
  'accept': 'application/json'},
 '_request_http_method': 'POST',
 '_request_data': {'property': 'study_id',
  'value': 'ot_425',
  'exact': False,
  'verbose': False},
 '_response_obj': <Response [400]>,
 '_response_dict': None,
 '_tree': None,
 '_to_object_converter': <opentree.object_conversion.DendropyConvert at 0x7f717dd810b8>}

In [20]:
OT.find_studies(studyid, search_property = 'study_id').__dict__

{'_request_url': 'https://api.opentreeoflife.org/v3/studies/find_studies',
 '_request_headers': {'content-type': 'application/json',
  'accept': 'application/json'},
 '_request_http_method': 'POST',
 '_request_data': {'property': 'study_id',
  'value': 'ot_425',
  'exact': False,
  'verbose': False},
 '_response_obj': <Response [400]>,
 '_response_dict': None,
 '_tree': None,
 '_to_object_converter': <opentree.object_conversion.DendropyConvert at 0x7f717dd810b8>}