## To install jupyter in a virtual environment, see:
 http://opentreeoflife.github.io/jupyter-venv


In [1]:
import dendropy
from opentree import OT

## Custom synth API
https://github.com/OpenTreeOfLife/ws_wrapper/blob/synth-on-demand/synth-on-demand.md

Use curl to run custom synthesis on a collection or list of collections

In [2]:
OT.get_ottid_from_name("Aves")

81461

In [3]:
!curl -X POST https://ot38.opentreeoflife.org/v3/tree_of_life/build_tree -d '{"input_collection":["snacktavish/woodpeckers","josephwb/aves", "opentreeoflife/metazoa", "opentreeoflife/safe-microbes", "opentreeoflife/default", "snacktavish/ebirdtaxonomy"], "root_id": "ott81461"}'



{"opentree_home": "/home/otcetera/custom_synth_repos", "ott_dir": "/home/otcetera/custom_synth_repos/ott3.2-extinct-flagged", "root_ott_id": "81461", "synth_id": "multi_snacktavish_woodpeckers_81461_tmp1cxnx8d1", "collections": "snacktavish/woodpeckers,josephwb/aves,opentreeoflife/metazoa,opentreeoflife/safe-microbes,opentreeoflife/default,snacktavish/ebirdtaxonomy", "cleaning_flags": "major_rank_conflict,major_rank_conflict_inherited,environmental,viral,barren,not_otu,hidden,was_container,inconsistent,hybrid,merged", "additional_regrafting_flags": "extinct_inherited,extinct", "queue_order": 19, "status": "QUEUED"}

Check to see custom synth run has completed

In [4]:
!curl -X GET https://ot38.opentreeoflife.org/v3/tree_of_life/list_custom_built_trees | jq


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 14060  100 14060    0     0  34716      0 --:--:-- --:--:-- --:--:--     0--:--:-- 34801
[1;39m{
  [0m[34;1m"snacktavish_ebirdtaxonomy_1020138_tmpz3b44bqn"[0m[1;39m: [0m[1;39m{
    [0m[34;1m"opentree_home"[0m[1;39m: [0m[0;32m"/home/otcetera/custom_synth_repos"[0m[1;39m,
    [0m[34;1m"ott_dir"[0m[1;39m: [0m[0;32m"/home/otcetera/custom_synth_repos/ott3.2-extinct-flagged"[0m[1;39m,
    [0m[34;1m"root_ott_id"[0m[1;39m: [0m[0;32m"1020138"[0m[1;39m,
    [0m[34;1m"synth_id"[0m[1;39m: [0m[0;32m"snacktavish_ebirdtaxonomy_1020138_tmpz3b44bqn"[0m[1;39m,
    [0m[34;1m"collections"[0m[1;39m: [0m[0;32m"snacktavish/ebirdtaxonomy"[0m[1;39m,
    [0m[34;1m"cleaning_flags"[0m[1;39m: [0m[0;32m"major_rank_conflict,major_rank_conflict_inherited,environmental,viral,barren,not_otu,hidden,was_contai

Download the outputs of synthesis form the URL for your run shown in "download_url", and unzip

In [5]:
#!curl -X GET https://ot38.opentreeoflife.org/v3/tree_of_life/custom_built_tree/multi_snacktavish_woodpeckers_81461_tmpuw9n86mh.tar.gz --output custom_synth.tar.gz


In [6]:
#!tar -xzvf custom_synth.tar.gz

In [7]:
treefile = "multi_snacktavish_woodpeckers_81461_tmpq4e6csvr/labelled_supertree/labelled_supertree.tre"
custom_synth_tree = dendropy.Tree.get_from_path(treefile, schema = 'newick')

In [8]:
import json
# From git@github.com:OpenTreeOfLife/ChronoSynth.git
# this file is generated by running examples/conflict_test_all.py
dates = json.load(open("../node_ages1.json"))

In [9]:
dates.keys()

dict_keys(['metadata', 'node_ages'])

In [10]:
dates['metadata']

{'date': '2021-04-14',
 'synth_tree_about': {'date_created': '2019-12-23 11:41:23',
  'filtered_flags': ['major_rank_conflict',
   'major_rank_conflict_inherited',
   'environmental',
   'viral',
   'barren',
   'not_otu',
   'hidden',
   'was_container',
   'inconsistent',
   'hybrid',
   'merged'],
  'num_source_studies': 1162,
  'num_source_trees': 1216,
  'root': {'node_id': 'ott93302',
   'num_tips': 2391916,
   'taxon': {'name': 'cellular organisms',
    'ott_id': 93302,
    'rank': 'no rank',
    'tax_sources': ['ncbi:131567'],
    'unique_name': 'cellular organisms'}},
  'synth_id': 'opentree12.3',
  'taxonomy_version': '3.2draft9'}}

# Dictionary has an entry for each node in the synth tree
which we have dates for

e.g.
synth node:
https://tree.opentreeoflife.org/opentree/argus/opentree12.3@mrcaott10058ott10061  

input chronogram node
https://tree.opentreeoflife.org/curator/study/view/ot_2008?tab=home&tree=tree5&node=node2747




In [11]:
dates['node_ages']['mrcaott10058ott10061']

[{'age': 10.613932,
  'source_id': 'ot_2008@tree5',
  'source_node': 'node2747',
  'time_unit': 'Myr'},
 {'age': 10.613932,
  'source_id': 'ot_2008@tree3',
  'source_node': 'node1523',
  'time_unit': 'Myr'},
 {'age': 13.458842,
  'source_id': 'ot_307@tree2',
  'source_node': 'node10328',
  'time_unit': 'Myr'},
 {'age': 6.26,
  'source_id': 'ot_409@tree2',
  'source_node': 'node75387',
  'time_unit': 'Myr'},
 {'age': 7.636994,
  'source_id': 'ot_1177@tree1',
  'source_node': 'node3589',
  'time_unit': 'Myr'},
 {'age': 6.683522,
  'source_id': 'ot_1041@tree1',
  'source_node': 'node3612',
  'time_unit': 'Myr'}]

In [12]:
## WARNING! It looks like there may be some errors in some of the dates!

#e.g.
# ott886696

# ott896272
# 
ott_id = 'ott886696'
print(ott_id)
for source in dates['node_ages'][ott_id]:
    print(source)

ott_id = 'ott896272'   
print(ott_id)
for source in dates['node_ages'][ott_id]:
    print(source)


ott886696
{'age': 0.04223641137553541, 'source_id': 'ot_188@Tr44937', 'source_node': 'Tn7164523', 'time_unit': 'Myr'}
{'age': 55.40955610300376, 'source_id': 'ot_188@Tr44941', 'source_node': 'Tn7164933', 'time_unit': 'Myr'}
{'age': 0.06205883894405972, 'source_id': 'ot_188@Tr44935', 'source_node': 'Tn7164165', 'time_unit': 'Myr'}
ott896272
{'age': 241.0, 'source_id': 'ot_409@tree2', 'source_node': 'node114003', 'time_unit': 'Myr'}
{'age': 0.0, 'source_id': 'pg_1339@tree2668', 'source_node': 'node591157', 'time_unit': 'Myr'}


## Exercise: How could you potentially find and flag or fix problematic dates?

## Write out ages file for BLADJ

In [13]:
#Blad demands a date for the root, and will seg fault if it doesn't get one. 
#We can check to see if there already is one

dates['node_ages']['ott81461']

KeyError: 'ott81461'

In [None]:
#I picked this one form google, and am adding it to the dictionary
dates['node_ages']['ott81461'] = {'age': 60,
  'source_id': 'google',
  'source_node': None,
  'time_unit': 'Myr'},

We will write out input files for BLADJ.
http://phylodiversity.net/bladj/

This program is VERY finicky. If it appears not to be running, try directly on teh command line to see if you can get mroe helpful error messages.


## Exercise: Function for bladj inputs
Make this into a chronosynth function, that takes a tree and a dates dictionary, 
 and returns the ages file, input tree and citations for the dates!


In [None]:
##TODO make this into a chronosynth function, that takes a tree and a dates dictionary, 
# and returns the ages file, input tree and citations for the dates!

ages = open("ages",'w')

dated_nodes = set()
undated_nodes = set()
for node in custom_synth_tree:
    lab = None
    if node.label:
        if node.label.startswith('mrca'):
            lab = node.label
        elif node.label.startswith('ott'):
            lab = node.label
        else:
            lab = node.label.split()[-1]
        if lab in dates['node_ages']:
            dated_nodes.add(lab)
            age_range = [float(source['age']) for source in dates['node_ages'][lab]]
            age_est = sum(age_range) / len(age_range)
            # This uses the average age across multiple age estimates.
            ages.write("{}\t{}\n".format(node.label, age_est))
        else:
            undated_nodes.add(lab)


len(dated_nodes)

In [None]:
custom_synth_tree.write(path = "bladj_input.tre", schema = "newick")


In [None]:
! phylocom bladj -f bladj_input.tre > blad.tre

In [None]:
dated = dendropy.Tree.get(path="blad.tre", schema='newick')