In [2]:
import autograd.numpy as np
from autograd.scipy.special import logsumexp,erf
from autograd.scipy.stats import norm
from autograd import grad

#from autograd_gamma import gamma, gammainc, gammaincc, gammaincln, gammainccln

import matplotlib.pyplot as plt
from copy import deepcopy
from itertools import product
from itertools import combinations
import pickle

from io import StringIO
from Bio import Phylo

from tree import Tree
from optimizer import Optimizer

import warnings
warnings.filterwarnings('ignore')

np.random.seed(0)

In [3]:
pop_size = 5.0 # exponential parameter for constant pop size prior
data_file = '../dat/DS1.pickle'

# test the likelihood for 2 taxa

In [7]:
np.log(4)

1.3862943611198906

In [12]:
# initialize theta
theta = np.zeros((2,2,2))

# make genomes
genomes = [["A"],["A"]] # simple genomes (VERY SIMPLE)
tree_log_probs = np.array([[nuc2vec[g] for g in genome] for genome in genomes],dtype = float)
tree_log_probs = np.log(tree_log_probs)

log_times0 = np.log(np.array([[0,0],[0.3009806104147859,0]])) # this is the last tree from DS1_two_taxa

tree = Tree(theta,log_times0,tree_log_probs,
            pop_size=10.0)

print(tree.log_like)  # from BEAST: -1.920530823838002
print(tree.log_prior) # from BEAST: -2.3326831540355246

-1.9205308243321546
-2.332683154035524


# test the likelihood for 3 taxa

In [16]:
# initialize theta
theta = np.zeros((2,3,3))

# make genomes
genomes = [["A","A"],["A","A"],["A","T"]] # simple genomes (VERY SIMPLE)
tree_log_probs = np.array([[nuc2vec[g] for g in genome] for genome in genomes],dtype = float)
tree_log_probs = np.log(tree_log_probs)

log_times0 = np.log(np.array([[0,0,0],[0.0776,0,0],[0.42608429738419473,0.42608429738419473,0]])) # this is the last tree from DS1_three_taxa

tree = Tree(theta,log_times0,tree_log_probs,
            pop_size=0.1)

print(tree.log_like) # from BEAST: -5.560615294506409
print(tree.log_prior) # from BEAST: -1.2074763008267646

-5.560652885806353
-1.2076727878538556


# Test that BEAST and my code output the same prior and likelihoods

In [31]:
with open(data_file, 'rb') as f:
    DS1 = pickle.load(f)

genomes = []
species = []
for key in DS1:
    genomes.append(DS1[key])
    species.append(key)
    
n_species = len(species)

# From https://github.com/zcrabbit/vbpi-torch/blob/ff86cf0c47a5753f5cc5b4dfe0b6ed783ab22669/unrooted/phyloModel.py#L7-L11
nuc2vec = {'A':[1.,0.,0.,0.], 'G':[0.,1.,0.,0.], 'C':[0.,0.,1.,0.], 'T':[0.,0.,0.,1.],
           '-':[1.,1.,1.,1.], '?':[1.,1.,1.,1.], 'N':[1.,1.,1.,1.], 'R':[1.,1.,0.,0.],
           'Y':[0.,0.,1.,1.], 'S':[0.,1.,1.,0.], 'W':[1.,0.,0.,1.], 'K':[0.,1.,0.,1.],
           'M':[1.,0.,1.,0.], 'B':[0.,1.,1.,1.], 'D':[1.,1.,0.,1.], 'H':[1.,0.,1.,1.],
           'V':[1.,1.,1.,0.], '.':[1.,1.,1.,1.], 'U':[0.,0.,0.,1.]}

tree_log_probs = np.array([[nuc2vec[g] for g in genome] for genome in genomes],dtype = float)
tree_log_probs = np.log(tree_log_probs)

theta = np.zeros((2,n_species,n_species))

In [32]:
# This is the final tree recorded by the DS1 BEAST run
dict0 = {
		"root": {
			"children": [
				{
					"children": [
						{
							"children": [
								{
									"children": [
										{
											"name": "Gallus_gallus",
											"length": 0.00897307827962543,
											"height": 6.938893903907228E-18
										},
										{
											"name": "Turdus_migratorius",
											"length": 0.00897307827962543,
											"height": 6.938893903907228E-18
										}
									],
									"length": 0.009754302894767291,
									"height": 0.008973078279625436
								},
								{
									"children": [
										{
											"children": [
												{
													"name": "Trachemys_scripta",
													"length": 0.005683859379273456,
													"height": 0.0
												},
												{
													"name": "Alligator_mississippiensis",
													"length": 0.005683859379273456,
													"height": 0.0
												}
											],
											"length": 0.007083909912285521,
											"height": 0.005683859379273459
										},
										{
											"children": [
												{
													"name": "Sceloporus_undulatus",
													"length": 0.008535193361404288,
													"height": 6.938893903907228E-18
												},
												{
													"name": "Heterodon_platyrhinos",
													"length": 0.008535193361404288,
													"height": 6.938893903907228E-18
												}
											],
											"length": 0.004232575930154688,
											"height": 0.008535193361404295
										}
									],
									"length": 0.005959611882833744,
									"height": 0.012767769291558984
								}
							],
							"length": 0.01188857952562131,
							"height": 0.018727381174392727
						},
						{
							"children": [
								{
									"name": "Oryctolagus_cuniculus",
									"length": 0.00808858311224477,
									"height": 6.938893903907228E-18
								},
								{
									"children": [
										{
											"name": "Homo_sapiens",
											"length": 0.005346348762694691,
											"height": 6.938893903907228E-18
										},
										{
											"children": [
												{
													"name": "Rattus_norvegicus",
													"length": 0.0034141482694263294,
													"height": 1.3877787807814457E-17
												},
												{
													"name": "Mus_musculus",
													"length": 0.0034141482694263294,
													"height": 1.3877787807814457E-17
												}
											],
											"length": 0.0019322004932683617,
											"height": 0.003414148269426341
										}
									],
									"length": 0.0027422343495500795,
									"height": 0.0053463487626947
								}
							],
							"length": 0.022527377587769258,
							"height": 0.00808858311224478
						}
					],
					"length": 0.008778601967561174,
					"height": 0.030615960700014037
				},
				{
					"children": [
						{
							"name": "Latimeria_chalumnae",
							"length": 0.03345298185413513,
							"height": 6.938893903907228E-18
						},
						{
							"children": [
								{
									"name": "Xenopus_laevis",
									"length": 0.02816414954344855,
									"height": 6.938893903907228E-18
								},
								{
									"children": [
										{
											"children": [
												{
													"children": [
														{
															"name": "Bufo_valliceps",
															"length": 0.015416100368567439,
															"height": 6.938893903907228E-18
														},
														{
															"name": "Hyla_cinerea",
															"length": 0.015416100368567439,
															"height": 6.938893903907228E-18
														}
													],
													"length": 0.006194980236102144,
													"height": 0.015416100368567446
												},
												{
													"children": [
														{
															"children": [
																{
																	"name": "Eleutherodactylus_cuneatus",
																	"length": 0.012125874306634513,
																	"height": 6.938893903907228E-18
																},
																{
																	"name": "Nesomantis_thomasseti",
																	"length": 0.012125874306634513,
																	"height": 6.938893903907228E-18
																}
															],
															"length": 0.0053218537260140576,
															"height": 0.012125874306634518
														},
														{
															"name": "Gastrophryne_carolinensis",
															"length": 0.01744772803264857,
															"height": 6.938893903907228E-18
														}
													],
													"length": 0.0041633525720210125,
													"height": 0.017447728032648577
												}
											],
											"length": 0.001352754351484211,
											"height": 0.02161108060466959
										},
										{
											"children": [
												{
													"children": [
														{
															"name": "Discoglossus_pictus",
															"length": 0.013416937921794596,
															"height": 6.938893903907228E-18
														},
														{
															"children": [
																{
																	"children": [
																		{
																			"name": "Ichthyophis_bannanicus",
																			"length": 0.009613349457416782,
																			"height": 1.3877787807814457E-17
																		},
																		{
																			"children": [
																				{
																					"name": "Amphiuma_tridactylum",
																					"length": 0.009297485372961897,
																					"height": 6.938893903907228E-18
																				},
																				{
																					"children": [
																						{
																							"name": "Hypogeophis_rostratus",
																							"length": 0.003552111119460157,
																							"height": 1.3877787807814457E-17
																						},
																						{
																							"name": "Grandisonia_alternans",
																							"length": 0.003552111119460157,
																							"height": 1.3877787807814457E-17
																						}
																					],
																					"length": 0.00574537425350174,
																					"height": 0.0035521111194601676
																				}
																			],
																			"length": 3.1586408445488483E-4,
																			"height": 0.009297485372961906
																		}
																	],
																	"length": 6.947027706769738E-4,
																	"height": 0.009613349457416792
																},
																{
																	"children": [
																		{
																			"name": "Plethodon_yonhalossee",
																			"length": 0.0038732372246640563,
																			"height": 1.3877787807814457E-17
																		},
																		{
																			"name": "Scaphiopus_holbrooki",
																			"length": 0.0038732372246640563,
																			"height": 1.3877787807814457E-17
																		}
																	],
																	"length": 0.006434815003429699,
																	"height": 0.0038732372246640667
																}
															],
															"length": 0.0031088856937008403,
															"height": 0.010308052228093764
														}
													],
													"length": 0.0034742205996797434,
													"height": 0.013416937921794603
												},
												{
													"children": [
														{
															"children": [
																{
																	"name": "Siren_intermedia",
																	"length": 0.004686647277646954,
																	"height": 0.0
																},
																{
																	"name": "Ambystoma_mexicanum",
																	"length": 0.004686647277646954,
																	"height": 0.0
																}
															],
															"length": 0.0016164261155914763,
															"height": 0.004686647277646956
														},
														{
															"name": "Typhlonectes_natans",
															"length": 0.006303073393238431,
															"height": 0.0
														}
													],
													"length": 0.010588085128235909,
													"height": 0.006303073393238434
												}
											],
											"length": 0.006072676434679455,
											"height": 0.016891158521474346
										}
									],
									"length": 0.0052003145872947555,
									"height": 0.0229638349561538
								}
							],
							"length": 0.005288832310686581,
							"height": 0.028164149543448556
						}
					],
					"length": 0.005941580813440074,
					"height": 0.03345298185413514
				}
			],
			"height": 0.03939456266757521
		}
	}

In [33]:
def find_leaf_inds(node):
    if "name" in node:
        return [species.index(node["name"])]
    else:
        return find_leaf_inds(node["children"][0]) + find_leaf_inds(node["children"][1])

times = np.zeros((n_species,n_species))

def populate_times(node):
    
    if not "children" in node:
        return
    
    left_inds = find_leaf_inds(node['children'][0])
    right_inds = find_leaf_inds(node['children'][1])
    height = node["children"][0]["height"] + node["children"][0]["length"]
    
    for i in left_inds:
        for j in right_inds:
            times[max(i,j),min(i,j)] = height
            
    populate_times(node['children'][0])
    populate_times(node['children'][1])
    return

populate_times(dict0['root'])

In [34]:
log_times = np.log(times + np.triu(np.ones_like(theta[0])*np.inf))

tree = Tree(theta,log_times,tree_log_probs,
            pop_size=pop_size)

print(tree.log_like) # from BEAST: -6973.035883311344
print(tree.log_prior) # from BEAST: -42.49120102995377
print(tree.log_q) 

-6973.035883262459
-42.491201029953764
-140.17817280570364
