In [1]:
import dendropy
import random
import math 
from dendropy.simulate import treesim
import numpy as np

#this function generates borwnian motion for a given tree
def simulate_brownian(t, sigma, dimension):
    #t is the tree
    #sigma is the standard deviation of the brownian motion
    #dimension is the number of dimensions in which we generate the random walk
    for node in t.preorder_node_iter():
        if node.parent_node is None:
            node.X = float(0)
            node.displacementx = float(0)
            if dimension==2:
                node.Y = float (0)
                node.displacementy = float(0)
        else:
            node.displacementx = random.gauss(0, sigma*math.sqrt(node.edge.length))
            node.X = node.parent_node.X+node.displacementx            
            if dimension==2:
                node.displacementy = random.gauss(0, sigma*math.sqrt(node.edge.length))
                node.Y = node.parent_node.Y+node.displacementy            
    return t


#debug code bellow

def generatebd(br, dr, mt):
    t = treesim.birth_death_tree(birth_rate=br, death_rate=dr, max_time=mt, is_retain_extinct_tips=True, is_add_extinct_attr=True)
    #t.print_plot()    
    
    index = 0
    namespace = [];
    
    for node in t.preorder_node_iter():
        index=index+1
        namespace.append("T"+str(index))
    
    #name all nodes instead of just leaves
    taxon_namespace = dendropy.TaxonNamespace(namespace)
    t.taxon_namespace=taxon_namespace
    index=0
    for node in t.preorder_node_iter():
        index=index+1
        node.taxon=t.taxon_namespace.get_taxon("T"+str(index))
    
    #distance to root
    t=cal_times(t)        
    return t
    
def cal_times(t):
    for node in t.preorder_node_iter():
        if node.parent_node is None:
            node.time = 0
        else:
            node.time = node.parent_node.time+node.edge.length
            
    return t       
    
tree = generatebd(1, 0.8, 3.0)
tree= simulate_brownian(tree, 1,2)

#tree.encode_bipartitions()
#tree.print_plot_bipartitions()
print(tree.as_ascii_plot(plot_metric='length', show_internal_node_labels=True))

# for edge in tree.preorder_edge_iter():
#     print("%s" % edge.length)


# for node in tree.preorder_node_iter():
#     print("%s : %s: %s" % (node.taxon.label, node.time, node.distance_from_root()))
# print("\n")
# for edge in tree.preorder_edge_iter():
#     if edge.head_node is not None and edge.tail_node is not None:
#         print("%s : %s : %s" % (edge.tail_node.taxon.label, edge.head_node.taxon.label, edge.length))


d=dendropy.model.discrete.hky85_chars(kappa=3, mutation_rate=0.1, seq_len=1000, tree_model=tree, retain_sequences_on_tree=False)
# for node in d:
#     print("%s : %s" % (node.label, d[node]))


# for node in tree.preorder_node_iter():    
#     print("%s : %s : %s" % (node.taxon.label, node.X, node.Y))

# print("\n is tip extinct?:")
# for leaf in tree.leaf_node_iter():    
#     print("%s : %s" % (leaf.taxon.label, hasattr(leaf, 'is_extinct')))
#     if hasattr(leaf, 'is_extinct'):
#         leaf.extinct_ancestor = True
#     else:
#         leaf.extinct_ancestor = False
        
for node in tree.postorder_node_iter(): 
    if not hasattr(node, 'extinct_ancestor'):
        child_extinct = False
        for child in node.child_node_iter():
            if child.extinct_ancestor:
                child_extinct =True
        node.extinct_ancestor = child_extinct

print("\n is tip ancestor of an extinct node?:")
for node in tree.postorder_node_iter():    
    print("%s : %s" % (node.taxon.label, node.extinct_ancestor))


                          /-------------- T4                                                                          
               /----------T3                                                                                          
               |          \- T5                                                                                       
               |                                                                                                      
               |                                       /- T8                                                          
               |                     /-----------------T7                                                             
               |                     |                 |                   /- T10                                     
               |                     |                 \-------------------T9                                         
            /--T2                    |          

In [2]:
#drawing migration diagram

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


xcoords=[]
ycoords=[]

for node in tree.preorder_node_iter():
    xcoords.append(node.X)
    ycoords.append(node.Y)
    
tree.write(path="output.tre", schema="newick")
d.write(path="fastaoutput.tre", schema="fasta")

plt.scatter(xcoords, ycoords)
for node in tree.preorder_node_iter():
    if node.parent_node is not None:
        plt.arrow(node.parent_node.X,node.parent_node.Y, node.displacementx,node.displacementy)
plt.show()


<Figure size 640x480 with 1 Axes>

In [3]:
#writing BEAST xml file dependiing of the dimension
#t is the tree, d is the DNA character matrix (currently not needed), i is the index of the file
def write_BEAST_xml(t, d, i, dimension):
    if dimension==2:
        write_BEAST_xml_dim_2(t, d, i)
    else:
        write_BEAST_xml_dim_1(t, d, i)

In [4]:
def write_BEAST_xml_dim_2(t, d, i):
    file = open("output8/beast"+str(i)+".xml","w")
    file.write('<?xml version="1.0" standalone="yes"?>\n')
    file.write('<beast version="1.10.4">\n')
    file.write('\t<taxa id="taxa">\n')
    for tax in d:
        file.write('\t\t<taxon id="'+tax.label+'">\n')
        file.write('\t\t\t<date value="'+str(t.find_node_for_taxon(tax).time)+'" direction="forwards" units="years"/>\n')
        file.write('\t\t\t<attr name="X">\n')
        file.write('\t\t\t\t'+str(t.find_node_for_taxon(tax).X)+'\n')
        file.write('\t\t\t</attr>\n')
        file.write('\t\t\t<attr name="Y">\n')
        file.write('\t\t\t\t'+str(t.find_node_for_taxon(tax).Y)+'\n')
        file.write('\t\t\t</attr>\n')
        
        ##perhaps not needed?
        file.write('\t\t\t<attr name="X">\n')
        file.write('\t\t\t\t'+str(t.find_node_for_taxon(tax).X)+'\n')
        file.write('\t\t\t</attr>\n')
        file.write('\t\t\t<attr name="Y">\n')
        file.write('\t\t\t\t'+str(t.find_node_for_taxon(tax).Y)+'\n')
        file.write('\t\t\t</attr>\n')

        file.write('\t\t</taxon>\n')   
    file.write('\t</taxa>\n')  
    
    file.write('\t<newick id="startingTree">\n')
    file.write('\t\t'+t.as_string(schema="newick",suppress_rooting=True)+'\n')
    
    file.write('\t</newick>\n')
    
    
    file.write("""	<treeModel id="treeModel">
		<coalescentTree idref="startingTree"/>
		<rootHeight>
			<parameter id="treeModel.rootHeight"/>
		</rootHeight>
		<nodeHeights internalNodes="true">
			<parameter id="treeModel.internalNodeHeights"/>
		</nodeHeights>
		<nodeHeights internalNodes="true" rootNode="true">
			<parameter id="treeModel.allInternalNodeHeights"/>
		</nodeHeights>
	</treeModel>\n""")
    
    file.write("""	<!-- Statistic for sum of the branch lengths of the tree (tree length)       -->
	<treeLengthStatistic id="treeLength">
		<treeModel idref="treeModel"/>
	</treeLengthStatistic>

	<!-- Statistic for time of most recent common ancestor of tree               -->
	<tmrcaStatistic id="age(root)" absolute="true">
		<treeModel idref="treeModel"/>
	</tmrcaStatistic>

<!-- START Multivariate diffusion model                                      -->

	<multivariateDiffusionModel id="X.diffusionModel">
		<precisionMatrix>
			<matrixParameter id="X.precision">
				<parameter id="X.precision.col1" value="0.05"/>
			</matrixParameter>
		</precisionMatrix>
	</multivariateDiffusionModel>

	<multivariateWishartPrior id="X.precisionPrior" df="1">
		<scaleMatrix>
			<matrixParameter>
				<parameter value="1.0"/>
			</matrixParameter>
		</scaleMatrix>
		<data>
			<parameter idref="X.precision"/>
		</data>
	</multivariateWishartPrior>

	<multivariateDiffusionModel id="Y.diffusionModel">
		<precisionMatrix>
			<matrixParameter id="Y.precision">
				<parameter id="Y.precision.col1" value="0.05"/>
			</matrixParameter>
		</precisionMatrix>
	</multivariateDiffusionModel>

	<multivariateWishartPrior id="Y.precisionPrior" df="1">
		<scaleMatrix>
			<matrixParameter>
				<parameter value="1.0"/>
			</matrixParameter>
		</scaleMatrix>
		<data>
			<parameter idref="Y.precision"/>
		</data>
	</multivariateWishartPrior>

	<!-- END Multivariate diffusion model                                        -->

	

	<!-- START Multivariate diffusion model                                      -->

	<multivariateTraitLikelihood id="X.traitLikelihood" traitName="X" useTreeLength="true" scaleByTime="true" reportAsMultivariate="true" reciprocalRates="true" integrateInternalTraits="true">
		<multivariateDiffusionModel idref="X.diffusionModel"/>
		<treeModel idref="treeModel"/>
		<traitParameter>
			<parameter id="leaf.X"/>
		</traitParameter>
		<conjugateRootPrior>
			<meanParameter>
				<parameter value="0.0"/>
			</meanParameter>
			<priorSampleSize>
				<parameter value="0.000001"/>
			</priorSampleSize>
		</conjugateRootPrior>
	</multivariateTraitLikelihood>
	<matrixInverse id="X.varCovar">
		<matrixParameter idref="X.precision"/>
	</matrixInverse>
	<continuousDiffusionStatistic id="X.diffusionRate">
		<multivariateTraitLikelihood idref="X.traitLikelihood"/>
	</continuousDiffusionStatistic>


	<multivariateTraitLikelihood id="Y.traitLikelihood" traitName="Y" useTreeLength="true" scaleByTime="true" reportAsMultivariate="true" reciprocalRates="true" integrateInternalTraits="true">
		<multivariateDiffusionModel idref="Y.diffusionModel"/>
		<treeModel idref="treeModel"/>
		<traitParameter>
			<parameter id="leaf.Y"/>
		</traitParameter>
		<conjugateRootPrior>
			<meanParameter>
				<parameter value="0.0"/>
			</meanParameter>
			<priorSampleSize>
				<parameter value="0.000001"/>
			</priorSampleSize>
		</conjugateRootPrior>
	</multivariateTraitLikelihood>
	<matrixInverse id="Y.varCovar">
		<matrixParameter idref="Y.precision"/>
	</matrixInverse>
	<continuousDiffusionStatistic id="Y.diffusionRate">
		<multivariateTraitLikelihood idref="Y.traitLikelihood"/>
	</continuousDiffusionStatistic>

	<!-- END Multivariate diffusion model                                        -->

	<!-- Define operators                                                        -->
	<operators id="operators" optimizationSchedule="log">

		<!-- START Multivariate diffusion model                                      -->
		<precisionGibbsOperator weight="1">
			<multivariateTraitLikelihood idref="X.traitLikelihood"/>
			<multivariateWishartPrior idref="X.precisionPrior"/>
		</precisionGibbsOperator>
		<precisionGibbsOperator weight="1">
			<multivariateTraitLikelihood idref="Y.traitLikelihood"/>
			<multivariateWishartPrior idref="Y.precisionPrior"/>
		</precisionGibbsOperator>

		<!-- END Multivariate diffusion model                                        -->

	</operators>
	

	<!-- Define MCMC                                                             -->
	<mcmc id="mcmc" chainLength="50000" autoOptimize="true" operatorAnalysis=""" +'"beastfiles\\beast'+str(i)+'.ops.txt"'+""">
		<joint id="joint">
			<prior id="prior">
				

				<!-- START Multivariate diffusion model                                      -->
				<multivariateWishartPrior idref="X.precisionPrior"/>
				<multivariateWishartPrior idref="Y.precisionPrior"/>

				<!-- END Multivariate diffusion model                                        -->

			</prior>
			<likelihood id="likelihood">
				

				<!-- START Multivariate diffusion model                                      -->
				<multivariateTraitLikelihood idref="X.traitLikelihood"/>
				<multivariateTraitLikelihood idref="Y.traitLikelihood"/>

				<!-- END Multivariate diffusion model                                        -->

			</likelihood>
		</joint>
		<operators idref="operators"/>

		<!-- write log to screen                                                     -->
		<log id="screenLog" logEvery="50">
			<column label="Joint" dp="4" width="12">
				<joint idref="joint"/>
			</column>
			<column label="Prior" dp="4" width="12">
				<prior idref="prior"/>
			</column>
			<column label="Likelihood" dp="4" width="12">
				<likelihood idref="likelihood"/>
			</column>
			<column label="age(root)" sf="6" width="12">
				<tmrcaStatistic idref="age(root)"/>
			</column>
			
		</log>

		<!-- write log to file                                                       -->
		<log id="fileLog" logEvery="50" fileName="""+'"beastfiles\\beast'+str(i)+'.log.txt"'+""" overwrite="false">
			<joint idref="joint"/>
			<prior idref="prior"/>
			<likelihood idref="likelihood"/>
			<parameter idref="treeModel.rootHeight"/>
			<tmrcaStatistic idref="age(root)"/>
			<treeLengthStatistic idref="treeLength"/>
			

			<!-- START Multivariate diffusion model                                      -->
			<matrixParameter idref="X.precision"/>
			<matrixInverse idref="X.varCovar"/>
			<continuousDiffusionStatistic idref="X.diffusionRate"/>
			<matrixParameter idref="Y.precision"/>
			<matrixInverse idref="Y.varCovar"/>
			<continuousDiffusionStatistic idref="Y.diffusionRate"/>

			<!-- END Multivariate diffusion model                                        -->

			<!-- START Multivariate diffusion model                                      -->
			<multivariateTraitLikelihood idref="X.traitLikelihood"/>
			<multivariateTraitLikelihood idref="Y.traitLikelihood"/>

			<!-- END Multivariate diffusion model                                        -->

			
			
		</log>

		<!-- write tree log to file                                                  -->
		<logTree id="treeFileLog" logEvery="50" nexusFormat="true" fileName="""+'"beastfiles\\beast'+str(i)+'.trees.txt"'""" sortTranslationTable="true">
			<treeModel idref="treeModel"/>
			
			<joint idref="joint"/>

			<!-- START Ancestral state reconstruction                                    -->
			<trait name="X" tag="X">
				<multivariateTraitLikelihood idref="X.traitLikelihood"/>
			</trait>
			<trait name="Y" tag="Y">
				<multivariateTraitLikelihood idref="Y.traitLikelihood"/>
			</trait>

			<!-- END Ancestral state reconstruction                                      -->


			<!-- START Multivariate diffusion model                                      -->
			<multivariateDiffusionModel idref="X.diffusionModel"/>
			<multivariateTraitLikelihood idref="X.traitLikelihood"/>
			<multivariateDiffusionModel idref="Y.diffusionModel"/>
			<multivariateTraitLikelihood idref="Y.traitLikelihood"/>

			<!-- END Multivariate diffusion model                                        -->

		</logTree>
	</mcmc>
	
	<report>
		<property name="timer">
			<mcmc idref="mcmc"/>
		</property>
	</report>\n""")    
        
    file.write('</beast>\n')
        
    file.close()

In [5]:
def write_BEAST_xml_dim_1(t, d, i):
    file = open("output8/beast"+str(i)+".xml","w")
    file.write('<?xml version="1.0" standalone="yes"?>\n')
    file.write('<beast version="1.10.4">\n')
    file.write('\t<taxa id="taxa">\n')
    for tax in d:
        file.write('\t\t<taxon id="'+tax.label+'">\n')
        file.write('\t\t\t<date value="'+str(t.find_node_for_taxon(tax).time)+'" direction="forwards" units="years"/>\n')
        file.write('\t\t\t<attr name="X">\n')
        file.write('\t\t\t\t'+str(t.find_node_for_taxon(tax).X)+'\n')
        file.write('\t\t\t</attr>\n')        
        ##perhaps not needed?
        file.write('\t\t\t<attr name="X">\n')
        file.write('\t\t\t\t'+str(t.find_node_for_taxon(tax).X)+'\n')
        file.write('\t\t\t</attr>\n')

        file.write('\t\t</taxon>\n')   
    file.write('\t</taxa>\n')  
    
    file.write('\t<newick id="startingTree">\n')
    file.write('\t\t'+t.as_string(schema="newick",suppress_rooting=True)+'\n')
    
    file.write('\t</newick>\n')
    
    file.write("""	<treeModel id="treeModel">
		<coalescentTree idref="startingTree"/>
		<rootHeight>
			<parameter id="treeModel.rootHeight"/>
		</rootHeight>
		<nodeHeights internalNodes="true">
			<parameter id="treeModel.internalNodeHeights"/>
		</nodeHeights>
		<nodeHeights internalNodes="true" rootNode="true">
			<parameter id="treeModel.allInternalNodeHeights"/>
		</nodeHeights>
	</treeModel>\n""")
    
    file.write("""	<!-- Statistic for sum of the branch lengths of the tree (tree length)       -->
	<treeLengthStatistic id="treeLength">
		<treeModel idref="treeModel"/>
	</treeLengthStatistic>

	<!-- Statistic for time of most recent common ancestor of tree               -->
	<tmrcaStatistic id="age(root)" absolute="true">
		<treeModel idref="treeModel"/>
	</tmrcaStatistic>

<!-- START Multivariate diffusion model                                      -->

	<multivariateDiffusionModel id="X.diffusionModel">
		<precisionMatrix>
			<matrixParameter id="X.precision">
				<parameter id="X.precision.col1" value="0.05"/>
			</matrixParameter>
		</precisionMatrix>
	</multivariateDiffusionModel>

	<multivariateWishartPrior id="X.precisionPrior" df="1">
		<scaleMatrix>
			<matrixParameter>
				<parameter value="1.0"/>
			</matrixParameter>
		</scaleMatrix>
		<data>
			<parameter idref="X.precision"/>
		</data>
	</multivariateWishartPrior>

	<!-- END Multivariate diffusion model                                        -->

	

	<!-- START Multivariate diffusion model                                      -->

	<multivariateTraitLikelihood id="X.traitLikelihood" traitName="X" useTreeLength="true" scaleByTime="true" reportAsMultivariate="true" reciprocalRates="true" integrateInternalTraits="true">
		<multivariateDiffusionModel idref="X.diffusionModel"/>
		<treeModel idref="treeModel"/>
		<traitParameter>
			<parameter id="leaf.X"/>
		</traitParameter>
		<conjugateRootPrior>
			<meanParameter>
				<parameter value="0.0"/>
			</meanParameter>
			<priorSampleSize>
				<parameter value="0.000001"/>
			</priorSampleSize>
		</conjugateRootPrior>
	</multivariateTraitLikelihood>
	<matrixInverse id="X.varCovar">
		<matrixParameter idref="X.precision"/>
	</matrixInverse>
	<continuousDiffusionStatistic id="X.diffusionRate">
		<multivariateTraitLikelihood idref="X.traitLikelihood"/>
	</continuousDiffusionStatistic>


	<!-- END Multivariate diffusion model                                        -->


	<!-- Define operators                                                        -->
	<operators id="operators" optimizationSchedule="log">

		<!-- START Multivariate diffusion model                                      -->
		<precisionGibbsOperator weight="1">
			<multivariateTraitLikelihood idref="X.traitLikelihood"/>
			<multivariateWishartPrior idref="X.precisionPrior"/>
		</precisionGibbsOperator>

		<!-- END Multivariate diffusion model                                        -->

	</operators>
	

	<!-- Define MCMC                                                             -->
	<mcmc id="mcmc" chainLength="50000" autoOptimize="true" operatorAnalysis=""" +'"beastfiles\\beast'+str(i)+'.ops.txt"'+""">
		<joint id="joint">
			<prior id="prior">
				

				<!-- START Multivariate diffusion model                                      -->
				<multivariateWishartPrior idref="X.precisionPrior"/>

				<!-- END Multivariate diffusion model                                        -->

			</prior>
			<likelihood id="likelihood">
				

				<!-- START Multivariate diffusion model                                      -->
				<multivariateTraitLikelihood idref="X.traitLikelihood"/>

				<!-- END Multivariate diffusion model                                        -->

			</likelihood>
		</joint>
		<operators idref="operators"/>

		<!-- write log to screen                                                     -->
		<log id="screenLog" logEvery="50">
			<column label="Joint" dp="4" width="12">
				<joint idref="joint"/>
			</column>
			<column label="Prior" dp="4" width="12">
				<prior idref="prior"/>
			</column>
			<column label="Likelihood" dp="4" width="12">
				<likelihood idref="likelihood"/>
			</column>
			<column label="age(root)" sf="6" width="12">
				<tmrcaStatistic idref="age(root)"/>
			</column>
			
		</log>

		<!-- write log to file                                                       -->
		<log id="fileLog" logEvery="50" fileName="""+'"beastfiles\\beast'+str(i)+'.log.txt"'+""" overwrite="false">
			<joint idref="joint"/>
			<prior idref="prior"/>
			<likelihood idref="likelihood"/>
			<parameter idref="treeModel.rootHeight"/>
			<tmrcaStatistic idref="age(root)"/>
			<treeLengthStatistic idref="treeLength"/>
			

			<!-- START Multivariate diffusion model                                      -->
			<matrixParameter idref="X.precision"/>
			<matrixInverse idref="X.varCovar"/>
			<continuousDiffusionStatistic idref="X.diffusionRate"/>

			<!-- END Multivariate diffusion model                                        -->

			<!-- START Multivariate diffusion model                                      -->
			<multivariateTraitLikelihood idref="X.traitLikelihood"/>

			<!-- END Multivariate diffusion model                                        -->

			
			
		</log>
        
		<!-- write tree log to file                                                  -->
		<logTree id="treeFileLog" logEvery="50" nexusFormat="true" fileName="""+'"beastfiles\\beast'+str(i)+'.trees.txt"'""" sortTranslationTable="true">
			<treeModel idref="treeModel"/>
			
			<joint idref="joint"/>

			<!-- START Ancestral state reconstruction                                    -->
			<trait name="X" tag="X">
				<multivariateTraitLikelihood idref="X.traitLikelihood"/>
			</trait>

			<!-- END Ancestral state reconstruction                                      -->


			<!-- START Multivariate diffusion model                                      -->
			<multivariateDiffusionModel idref="X.diffusionModel"/>
			<multivariateTraitLikelihood idref="X.traitLikelihood"/>

			<!-- END Multivariate diffusion model                                        -->

		</logTree>
	</mcmc>
	
	<report>
		<property name="timer">
			<mcmc idref="mcmc"/>
		</property>
	</report>\n""")    
        
    file.write('</beast>\n')
        
    file.close()

In [6]:
def generate_tree(br, dr, num_extinct):
    t = treesim.birth_death_tree(birth_rate=br, death_rate=dr, num_extinct_tips=num_extinct, is_retain_extinct_tips=True, is_add_extinct_attr=True)
    #t.print_plot()    
    
    index = 0
    namespace = [];
    
    for node in t.preorder_node_iter():
        index=index+1
        namespace.append("T"+str(index))
    
    #name all nodes instead of just leaves
    taxon_namespace = dendropy.TaxonNamespace(namespace)
    t.taxon_namespace=taxon_namespace
    index=0
    for node in t.preorder_node_iter():
        index=index+1
        node.taxon=t.taxon_namespace.get_taxon("T"+str(index))
    
    t =prune_nodes(t)
    
    #distance to root
    t=cal_times(t)
        
    return t

#this function checks which of the nodes is an extinct leaf or an ancestor of an extinct leaf
#and only leaves these nodes in the tree
def prune_nodes(t):
    for leaf in t.leaf_node_iter():    
        if hasattr(leaf, 'is_extinct'):
            leaf.extinct_ancestor = True
        else:
            leaf.extinct_ancestor = False
        
    for node in t.postorder_node_iter(): 
        if not hasattr(node, 'extinct_ancestor'):
            child_extinct = False
            for child in node.child_node_iter():
                if child.extinct_ancestor:
                    child_extinct =True
            node.extinct_ancestor = child_extinct
    labels = set([taxon.label for taxon in t.taxon_namespace
        if not t.find_node_for_taxon(taxon).extinct_ancestor])
    t1 = t.extract_tree_without_taxa_labels(labels=labels)
    return t1


#an example of a function to run birth-death simulations and generate output
def run_bd_simulations(num_trees, dimension):
    for i in range(num_trees):
        t = generate_tree(1,0.5,20)
        t= simulate_brownian(t, 1, dimension)
        d=dendropy.model.discrete.hky85_chars(kappa=3, mutation_rate=0.01, seq_len=1000,tree_model=t, retain_sequences_on_tree=False)
    
        for node in t.preorder_node_iter():
            node.annotations.add_bound_attribute("time")
            node.annotations.add_bound_attribute("X")
            if dimension==2:
                node.annotations.add_bound_attribute("Y")
    
        d.write(path="output1/fasta_output"+str(i)+".tre", schema="fasta")
        t.write(path="output2/phylogeny_output"+str(i)+".nex", schema="nexus", suppress_annotations=True)
    t.write(path="output3/newick_output"+str(i)+".tre", schema="newick", suppress_annotations=False)
    t.write(path="output4/nexus_output_"+str(i)+".tre", schema="nexus", suppress_internal_taxon_labels=True)    
    write_BEAST_xml(t, d, i, dimension)
    
num_trees = 3
dimension=1

run_bd_simulations(num_trees, dimension)



#     xy_dict = {}
#     dates_dict = {}
#     for tax in d:
#         xy_dict.update({tax.label : [t.find_node_for_taxon(tax).X, t.find_node_for_taxon(tax).Y]})
#         dates_dict.update({tax.label: [t.find_node_for_taxon(tax).time]})
        
#     xy_cts = dendropy.ContinuousCharacterMatrix.from_dict(xy_dict)
    
    
    #xy_cts.new_character_subset("aaaa", 3)
    
#     xy_cts.write(path="output5/positions_output"+str(i)+".xml", schema="nexus")
    
#     file1 = open("output6/dates_output"+str(i)+".txt","w") 
#     file2 = open("output7/position_output"+str(i)+".txt","w") 
#     file2.write("\tX\tY\n")
#     for tax in d:
#         file1.write(tax.label+"\t"+str(t.find_node_for_taxon(tax).time)+"\n")
#         file2.write(tax.label+"\t"+str(t.find_node_for_taxon(tax).X)+"\t"+str(t.find_node_for_taxon(tax).Y)+"\n")  
 
#     file1.close() 
#     file2.close()
    
    
#     if i==0:
#         print(dir(xy_cts))
        #compute continuous character data
   
#     x_dict = {}
#     y_dict = {}
#     for tax in d:
#         x_dict.update({tax.label : [t.find_node_for_taxon(tax).X]})
#         y_dict.update({tax.label : [t.find_node_for_taxon(tax).Y]})
    
#     x_cts = dendropy.ContinuousCharacterMatrix.from_dict(x_dict)
#     #x_cts.new_character_subset(label="x", character_indices = 0)
#     y_cts = dendropy.ContinuousCharacterMatrix.from_dict(y_dict)
#     y_cts.taxon_namespace = x_cts.taxon_namespace
#     #y_cts.new_character_subset(["y"], [0])
    
#     if i ==0:
#         print(x_dict)

#     print(x_cts.taxon_namespace)
#     print(y_cts.taxon_namespace)
    
#     positions = dendropy.ContinuousCharacterMatrix.concatenate([x_cts, y_cts])  




In [7]:
import random

#function to generate coalescent trees (this is the ultrametric case)
def generate_coalescent_tree(num_tips):
    names = []
    for i in range(2*num_tips-1):
        names.append("T"+str(i))
    
#     print(names)
    
    taxon_namespace = dendropy.TaxonNamespace(names)
    tree = dendropy.Tree(taxon_namespace=taxon_namespace)
    time_from_present = 0
    current_nodes = []
    for i in range(num_tips):
        node = dendropy.Node(taxon=taxon_namespace.get_taxon("T"+str(i)))
        current_nodes.append(node)
        node.age = 0
        
    
    
    for merges in range(num_tips-1):
        time_to_coalescent=random.expovariate(len(current_nodes)*(len(current_nodes)-1)/2)
        time_from_present=time_from_present+time_to_coalescent
        merging_branches = random.sample(range(len(current_nodes)),2)
        node = dendropy.Node(taxon=taxon_namespace.get_taxon("T"+str(merges+num_tips)))
        if merges == num_tips-2:
            node=tree.seed_node
            node.taxon=taxon_namespace.get_taxon("T"+str(merges+num_tips))
        node.age = time_from_present
        current_nodes[merging_branches[0]].edge.length=time_from_present-current_nodes[merging_branches[0]].age
        current_nodes[merging_branches[1]].edge.length=time_from_present-current_nodes[merging_branches[1]].age
        node.set_child_nodes([current_nodes[merging_branches[0]], current_nodes[merging_branches[1]]])
        
        current_nodes.pop(max(merging_branches))
        current_nodes.pop(min(merging_branches))
        current_nodes.append(node)
#     print(tree.as_string("newick"))
#     print(tree.as_ascii_plot(show_internal_node_labels=True, plot_metric='length'))
    tree=cal_times(tree)
    return tree

# tree=generate_coalescent_tree()
# tree=cal_times(tree)

# ages = []
# for i in range(100):
#     t=generate_coalescent_tree()
#     t=cal_times(t)
#     for leaf in t.leaf_node_iter():
#         ages.append(leaf.time)
#         break
# # print(ages)
# print(sum(ages)/100)

In [8]:
import random
def generate_coalescent_nonultrametric_tree():
    lamb=1
    period_length=4
    num_tips_per_period = 10
    num_periods = 6
    num_tips = num_tips_per_period*num_periods
    names = []
    for i in range(2*num_tips-1):
        names.append("T"+str(i))
#     print(names)

    
    taxon_namespace = dendropy.TaxonNamespace(names)
    tree = dendropy.Tree(taxon_namespace=taxon_namespace)
    time_from_present = 0
    current_nodes = []
    index = 0
    
    for current_period in range(num_periods):
        time_from_present=current_period*period_length
        for i in range(num_tips_per_period):
            node = dendropy.Node(taxon=taxon_namespace.get_taxon("T"+str(index)))
            current_nodes.append(node)
            index= index+1
            node.age = time_from_present
        
        
        current_num_tips = len(current_nodes)
        
        for merges in range(current_num_tips-1):
            time_to_coalescent=random.expovariate(lamb*len(current_nodes)*(len(current_nodes)-1)/2)
            time_from_present=time_from_present+time_to_coalescent
            if current_period < num_periods-1 and time_from_present > (current_period+1)*period_length:
                break
            else:
                merging_branches = random.sample(range(len(current_nodes)),2)
                
                if merges == current_num_tips-2 and current_period==num_periods-1:
                    node=tree.seed_node
                    node.taxon=taxon_namespace.get_taxon("T"+str(index))
                else:
                    node = dendropy.Node(taxon=taxon_namespace.get_taxon("T"+str(index)))
                index=index+1
                    
                node.age = time_from_present
                current_nodes[merging_branches[0]].edge.length=time_from_present-current_nodes[merging_branches[0]].age
                current_nodes[merging_branches[1]].edge.length=time_from_present-current_nodes[merging_branches[1]].age
                node.set_child_nodes([current_nodes[merging_branches[0]], current_nodes[merging_branches[1]]])
        
                current_nodes.pop(max(merging_branches))
                current_nodes.pop(min(merging_branches))
                current_nodes.append(node)
#     print(tree.as_string("newick"))
#     print(tree.as_ascii_plot(show_internal_node_labels=True, plot_metric='length'))
    tree=cal_times(tree)
    return tree

In [9]:
num_trees = 5
dimension = 1
import beastxmlwriter
for i in range(num_trees):
    t = generate_coalescent_nonultrametric_tree()
    t= simulate_brownian(t, 1, dimension)
    d=dendropy.model.discrete.hky85_chars(kappa=3, mutation_rate=0.01, seq_len=1000,tree_model=t, retain_sequences_on_tree=False)
    beastxmlwriter.write_BEAST_xml(t, d, i, dimension)
    
    import os
    os.system('cmd /c java -jar beast.jar -overwrite -seed 1234 "output8\\beast'+str(i)+'.xml"')