In [1]:
################################################################################
#
# RevBayes script: FBD analysis with fossil calibrations
# 
# This file: Runs the full MCMC ...
#
# authors: Barberis Tommaso, Verneret Marie, Chareyre Marie, Frouté Timothée
#
################################################################################

#######################
# Reading in the Data #
#######################
# Read the full list of taxa (including all fossils and extant species #
# taxa <- readTaxonData("../Projet/fishes/taxa.tsv")

# read nucleotide data
data <- readDiscreteCharacterData("../Projet/fishes/aln_prot.fst")

# n_taxa <- taxa.size()
taxa <- data.taxa()
n_species <- data.ntaxa()
n_branches <- 2 * n_species - 2

moves = VectorMoves()

   Missing Variable:	Variable os does not exist

   Successfully read one character matrix from file '../Projet/fishes/aln_prot.fst'


In [2]:
##########################################################################################
# Joint Fossil Birth-Death Process prior on the topology and fossil occurrence times #
##########################################################################################

# Define exponential priors on the birth rate and death rate #
# speciation rate
lambda <- 1.000001
# moves.append(mvScale(lambda, weight=1))

# extinction rate
mu <- 1.0
# moves.append(mvScale(mu, weight=1))

# sampling fraction (in the present)
rho <- 0.00001

# Create deterministic nodes for the diversification and turnover rates so that they can be monitored #
# diversification := lambda - mu
# turnover := mu/lambda

# Assume an exponential prior on the rate of sampling fossils (psi) #
# psi ~ dnExponential(10) 

# Specify a scale move on the psi parameter # 
# moves.append( mvScale(psi, weight=1.0) )

# The BD is conditioned on a starting time for the process, which is the origin time #
# Specify a uniform prior on the origin #
origin_time ~ dnUniform(300,750)

# Specify a sliding-window move on the origin_time parameter #
# This move will be applied with 3 different window widths (delta) to help improve mixing # 
moves.append(mvScale(origin_time, weight=1.0, lambda=0.1))

In [3]:
### Define the tree-prior distribution as the fossil birth-death process ###
# timetree ~ dnFBDP(origin=origin_time, lambda=lambda, mu=mu, psi=psi, rho=rho, taxa=taxa)
timetree ~ dnBDP(rootAge=origin_time, lambda=lambda, mu=mu, rho=rho, taxa=taxa, samplingStrategy="uniform", condition="nTaxa")

In [4]:
timetree

  
   (((Scyliorhinus_canicula[&index=23]:464.504533,(Chimaera_sp[&index=22]:319.794217,Heterodontus_francisci[&index=21]:319.794217)[&index=24]:144.710316)[&index=25]:219.724862,(((Rhinochimaera_pacifica[&index=20]:119.275739,Callorhinchus_capensis[&index=19]:119.275739)[&index=26]:446.137114,Callorhinchus_milii[&index=18]:565.412854)[&index=27]:10.796649,Amblyraja_radiata[&index=17]:576.209502)[&index=28]:108.019893)[&index=29]:27.081322,(((Hydrolagus_lemures[&index=16]:658.594272,Tetraodon_nigroviridis[&index=15]:658.594272)[&index=30]:33.696712,(((Plesiobatis_daviesi[&index=14]:125.272432,Cyprinus_carpio[&index=13]:125.272432)[&index=31]:208.861924,Homo_sapiens[&index=12]:334.134356)[&index=32]:338.512106,(((Squalus_acanthias[&index=11]:50.363902,Okamejei_kenojei[&index=10]:50.363902)[&index=33]:425.674184,Xenopus_tropicalis[&index=9]:476.038085)[&index=34]:143.408996,((Protopterus_aethiopicus[&index=8]:73.346300,Chimaera_monstrosa[&index=7]:73.346300)[&index=35]:521.217187,((Stru

In [5]:

# Specify moves on the tree and node times #
# These moves update the tree topology 
# moves.append( mvFNPR(timetree, weight=15.0) )
# moves.append(mvNarrow(timetree, weight=15.0))
# moves.append( mvCollapseExpandFossilBranch(timetree, origin_time, weight=6.0) )

# These moves update the node ages #
# Because we are conditioning on the origin time, we must also sample the root node age #
moves.append( mvNodeTimeSlideUniform(timetree, weight=20.0) )
moves.append( mvRootTimeSlideUniform(timetree, origin_time, weight=4.0) )
moves.append(mvSubtreeScale(timetree, weight=4))

In [6]:
# fossils = timetree.getFossils()

In [7]:
# for(i in 1:fossils.size())
# {
#     t[i] := tmrca(timetree, clade(fossils[i]))

#     a_i = fossils[i].getMinAge()
#     b_i = fossils[i].getMaxAge()

#     F[i] ~ dnUniform(t[i] - b_i, t[i] - a_i)
#     F[i].clamp( 0 )
# }

In [8]:
# read tree topology from file
tree <- readTrees("../Projet/fishes/vertebrate_Poisson_NJ.tree", treetype="clock")[1]

   Attempting to read the contents of file "vertebrate_Poisson_NJ.tree"
   Successfully read file


In [9]:
tree

  
   ((((Squalus_acanthias[&index=23]:0.040710,((Mustelus_manazo[&index=22]:0.031320,Scyliorhinus_canicula[&index=21]:0.043080)[&index=24]:0.010170,Heterodontus_francisci[&index=20]:0.042350)[&index=25]:0.004480)[&index=26]:0.017110,((Amblyraja_radiata[&index=19]:0.021970,Okamejei_kenojei[&index=18]:0.031320)[&index=27]:0.036820,Plesiobatis_daviesi[&index=17]:0.084620)[&index=28]:0.009850)[&index=29]:0.023300,((((Chimaera_monstrosa[&index=16]:0.016590,Chimaera_sp[&index=15]:0.017640)[&index=30]:0.012410,Hydrolagus_lemures[&index=14]:0.033440)[&index=31]:0.011530,(Rhinochimaera_pacifica[&index=13]:0.020300,Hariotta_raleighana[&index=12]:0.023250)[&index=32]:0.026530)[&index=33]:0.009060,(Callorhinchus_callorynchus[&index=11]:0.003400,(Callorhinchus_capensis[&index=10]:0.002350,Callorhinchus_milii[&index=9]:0.001360)[&index=34]:0.000780)[&index=35]:0.046730)[&index=36]:0.074110)[&index=37]:0.011030,(((Tetraodon_nigroviridis[&index=8]:0.064100,Polymixia_japonica[&index=7]:0.056080)[&ind

In [10]:
# ajust terminal branch lengths so as to make the tree ultrametric
tree.makeUltrametric()

# rescale all the nodes of the tree
tree.rescale(750/tree.rootAge())

In [11]:
tree

  
   ((((Squalus_acanthias[&index=23]:606.856634,((Mustelus_manazo[&index=22]:566.089715,Scyliorhinus_canicula[&index=21]:566.089715)[&index=24]:28.300312,Heterodontus_francisci[&index=20]:594.390027)[&index=25]:12.466607)[&index=26]:47.612422,((Amblyraja_radiata[&index=19]:524.599288,Okamejei_kenojei[&index=18]:524.599288)[&index=27]:102.459929,Plesiobatis_daviesi[&index=17]:627.059216)[&index=28]:27.409840)[&index=29]:64.837489,((((Chimaera_monstrosa[&index=16]:421.248887,Chimaera_sp[&index=15]:421.248887)[&index=30]:34.533615,Hydrolagus_lemures[&index=14]:455.782502)[&index=31]:32.084817,(Rhinochimaera_pacifica[&index=13]:414.041630,Hariotta_raleighana[&index=12]:414.041630)[&index=32]:73.825690)[&index=33]:25.211487,(Callorhinchus_callorynchus[&index=11]:383.042075,(Callorhinchus_capensis[&index=10]:380.871549,Callorhinchus_milii[&index=9]:380.871549)[&index=34]:2.170525)[&index=35]:130.036732)[&index=36]:206.227738)[&index=37]:30.693455,(((Tetraodon_nigroviridis[&index=8]:640.49

In [12]:
# set the value of the tree topology
timetree.setValue(tree)

In [13]:
# origin_time.setValue(500.0)

In [14]:
# ### nodes calibration
# clade_Gnathostomata <- clade("Cyprinus_carpio","Chimaera_monstrosa")
# minimum_bound_Gnathostomata <- 422
# # maximum_bound_Gnathostomata <- 463
# speciation_clade_Gnathostomata := tmrca(timetree, clade_Gnathostomata)


# # width_age_prior_Gnathostomata <- (maximum_bound_Gnathostomata-minimum_bound_Gnathostomata)/2.0
# # mean_age_prior_Gnathostomata <- minimum_bound_Gnathostomata + width_age_prior_Gnathostomata
# # obs_age_clade_Gnathostomata ~ dnNormal(speciation_clade_Gnathostomata, width_age_prior_Gnathostomata)
# obs_age_clade_Gnathostomata ~ dnExponential(0.2, offset = speciation_clade_Gnathostomata)
# obs_age_clade_Gnathostomata.clamp( minimum_bound_Gnathostomata )

# # clade_Euteleostomi <- clade("Cyprinus_carpio","Homo_sapiens")
# # minimum_bound_Euteleostomi <- 416
# # maximum_bound_Euteleostomi <- 422
# # speciation_clade_Euteleostomi := tmrca(timetree, clade_Euteleostomi)
# # width_age_prior_Euteleostomi <- (maximum_bound_Euteleostomi - minimum_bound_Euteleostomi)/2.0
# # mean_age_prior_Euteleostomi <- minimum_bound_Euteleostomi + width_age_prior_Euteleostomi
# # # obs_age_clade_Euteleostomi ~ dnNormal(speciation_clade_Euteleostomi, width_age_prior_Euteleostomi)
# # obs_age_clade_Euteleostomi ~ dnExponential(0.2, offset = -speciation_clade_Euteleostomi)
# # obs_age_clade_Euteleostomi.clamp( -minimum_bound_Euteleostomi )

# # clade_Chondrichthyes <- clade("Plesiobatis_daviesi","Chimaera_monstrosa")
# # minimum_bound_Chondrichthyes <- 410
# # speciation_clade_Chondrichthyes := tmrca(timetree, clade_Chondrichthyes)
# # obs_age_clade_Chondrichthyes ~ dnExponential(0.2, offset = -speciation_clade_Chondrichthyes)
# # obs_age_clade_Chondrichthyes.clamp(-minimum_bound_Chondrichthyes)

In [15]:
# node calibration
clade_1 <- clade("Cyprinus_carpio","Chimaera_monstrosa")
minimum_bound_clade_1 <- 422
speciation_clade_1 := tmrca(timetree, clade_1)
obs_age_clade_1 ~ dnExponential(0.2, offset= -speciation_clade_1)
obs_age_clade_1.clamp( -minimum_bound_clade_1 )

clade_2 <- clade("Cyprinus_carpio","Homo_sapiens")
minimum_bound_clade_2 <- 416
speciation_clade_2 := tmrca(timetree, clade_2)
obs_age_clade_2 ~ dnExponential(0.2, offset = -speciation_clade_2)
obs_age_clade_2.clamp( -minimum_bound_clade_2 )

clade_3 <- clade("Cyprinus_carpio","Tetraodon_nigroviridis")
minimum_bound_3 <- 150
speciation_clade_3 := tmrca(timetree, clade_3)
obs_age_clade_3 ~ dnExponential(0.2, offset = -speciation_clade_3)
obs_age_clade_3.clamp( -minimum_bound_3 )

clade_6 <- clade("Xenopus_tropicalis","Homo_sapiens")
minimum_bound_clade_6 <- 330
speciation_clade_6 := tmrca(timetree, clade_6)
obs_age_clade_6 ~ dnExponential(0.2, offset = -speciation_clade_6)
obs_age_clade_6.clamp( -minimum_bound_clade_6 )

clade_7 <- clade("Struthio_camelus","Homo_sapiens")
minimum_bound_clade_7 <- 312
speciation_clade_7 := tmrca(timetree, clade_7)
obs_age_clade_7 ~ dnExponential(0.2, offset = -speciation_clade_7)
obs_age_clade_7.clamp( -minimum_bound_clade_7 )

clade_8 <- clade("Ornithorhynchus_anatinus","Homo_sapiens")
minimum_bound_clade_8 <- 163
speciation_clade_8 := tmrca(timetree, clade_8)
obs_age_clade_8 ~ dnExponential(0.2, offset = -speciation_clade_8)
obs_age_clade_8.clamp( -minimum_bound_clade_8 )

clade_9 <- clade("Plesiobatis_daviesi","Chimaera_monstrosa")
minimum_bound_clade_9 <- 410
speciation_clade_9 := tmrca(timetree, clade_9)
obs_age_clade_9 ~ dnExponential(0.2, offset = -speciation_clade_9)
obs_age_clade_9.clamp( -minimum_bound_clade_9 )

clade_10 <- clade("Plesiobatis_daviesi","Mustelus_manazo")
minimum_bound_10 <- 190
speciation_clade_10 := tmrca(timetree, clade_10)
obs_age_clade_10 ~ dnExponential(0.2, offset = -speciation_clade_10)
obs_age_clade_10.clamp( -minimum_bound_10 )

clade_11 <- clade("Plesiobatis_daviesi","Okamejei_kenojei")
minimum_bound_11 <- 176
speciation_clade_11 := tmrca(timetree, clade_11)
obs_age_clade_11 ~ dnExponential(0.2, offset = -speciation_clade_11)
obs_age_clade_11.clamp( -minimum_bound_11 )

clade_14 <- clade("Heterodontus_francisci","Mustelus_manazo")
minimum_bound_14 <- 176
speciation_clade_14 := tmrca(timetree, clade_14)
obs_age_clade_14 ~ dnExponential(0.2, offset = -speciation_clade_14)
obs_age_clade_14.clamp( -minimum_bound_14 )

clade_15 <- clade("Scyliorhinus_canicula","Mustelus_manazo")
minimum_bound_15 <- 165
speciation_clade_15 := tmrca(timetree, clade_15)
obs_age_clade_15 ~ dnExponential(0.2, offset = -speciation_clade_15)
obs_age_clade_15.clamp( -minimum_bound_15 )

clade_16 <- clade("Callorhinchus_callorynchus","Chimaera_monstrosa")
minimum_bound_16 <- 161
speciation_clade_16 := tmrca(timetree, clade_16)
obs_age_clade_16 ~ dnExponential(0.2, offset = -speciation_clade_16)
obs_age_clade_16.clamp( -minimum_bound_16 )

clade_19 <- clade("Rhinochimaera_pacifica","Chimaera_monstrosa")
minimum_bound_19 <- 84
speciation_clade_19 := tmrca(timetree, clade_19)
obs_age_clade_19 ~ dnExponential(0.2, offset = -speciation_clade_19)
obs_age_clade_19.clamp( -minimum_bound_19 )


In [16]:
# ### Create deterministic nodes to monitor various tree statistics ###
# # Monitor the number of sampled ancestors in the FBD-tree #
# num_samp_anc := timetree.numSampledAncestors();

# # Monitor the age of the extant bears #
# clade_extant = clade("Cyprinus_carpio", "Polymixia_japonica", "Tetraodon_nigroviridis", "Protopterus_aethiopicus", "Xenopus_tropicalis", "Struthio_camelus", "Ornithorhynchus_anatinus",
#    "Homo_sapiens", "Plesiobatis_daviesi", "Okamejei_kenojei", "Amblyraja_radiata", "Squalus_acanthias", "Heterodontus_francisci", "Scyliorhinus_canicula", "Mustelus_manazo",
#    "Callorhinchus_milii", "Callorhinchus_capensis", "Callorhinchus_callorynchus", "Hariotta_raleighana", "Rhinochimaera_pacifica", "Hydrolagus_lemures", "Chimaera_sp",
#    "Chimaera_monstrosa")
# age_extant := tmrca(timetree, clade_extant)

# # Monitor the age of a particular fossil #
# # age_Kretzoiarctos_beatrix := tmrca(timetree, clade("clade_9")) # divergence between holocephals and elasmobranchii

In [17]:
###########################################
# Substitution model #
###########################################
#Create the Q matrix. These data are amino acid sequences, so we initialize the MtRev matrix #
Q := fnMtRev()

# We assume a relaxed morphological clock rate, drawn from an exponential prior #
mean_clockrate ~ dnExponential(1.0)
moves.append(mvScale(mean_clockrate, weight=1.0))

relvar_clockrate ~ dnExponential(1.0)
moves.append(mvScale(relvar_clockrate, weight=1.0))

alpha := 1.0 / relvar_clockrate
beta := alpha / mean_clockrate

for (i in 1:n_branches) {
    clockrate[i] ~ dnGamma(alpha, beta)
}

# Specify moves on the clockrate #
for (i in 1:n_branches){
    moves.append(mvScale(clockrate[i], weight=1.0, lambda=0.1))
}

In [18]:
### Create the substitution model and clamp with our observed Standard data ###
seq ~ dnPhyloCTMC( tree=timetree, Q=Q, branchRates=clockrate,  type="AA")
seq.clamp( data )

In [19]:
data

   
   Protein character matrix with 23 taxa and 3248 characters
   Origination:                   aln_prot.fst
   Number of taxa:                23
   Number of included taxa:       23
   Number of characters:          3248
   Number of included characters: 3248
   Datatype:                      Protein
   
   


In [20]:
########
# MCMC #
########

# initialize the model object #
mymodel = model(timetree)

In [21]:
# monitors = VectorMonitors()

In [22]:
# Create a vector of monitors #
# 1. for the full model #
monitors[1] = mnModel(filename="analyses/fishes_relaxedclock.log", printgen=10, separator = TAB)

In [23]:
# 2. the tree #
monitors[2] = mnFile(timetree, filename="analyses/fishes_relaxedclock.trees", printgen=10, separator = TAB)

In [24]:
# 3. and a few select parameters to be printed to the screen #
monitors[3] = mnScreen(printgen=100, mean_clockrate, relvar_clockrate, origin_time)

In [25]:
# Initialize the MCMC object #
mymcmc = mcmc(mymodel, monitors, moves)

# Run the MCMC #
mymcmc.run(generations=500)

   
   Running MCMC simulation
   This simulation runs 1 independent replicate.
   The simulator uses 50 different moves in a random move schedule with 75 moves per iteration
   

Iter        |      Posterior   |     Likelihood   |          Prior   |   mean_clock..   |    origin_time   |   relvar_clo..   |    elapsed   |        ETA   |
-------------------------------------------------------------------------------------------------------------------------------------------------------------
0           |        -208763   |        -208577   |       -185.745   |      0.3984971   |            750   |      0.6158502   |   00:00:00   |   --:--:--   |
100         |        -153432   |        -153261   |       -171.034   |      0.3238303   |       424.9448   |       1.058013   |   00:00:35   |   --:--:--   |
200         |        -149224   |        -149054   |       -169.398   |      0.3341338   |       429.1069   |        1.15231   |   00:01:09   |   00:01:43   |
300         |        -146663  

In [26]:
treetrace = readTreeTrace("analyses/fishes_relaxedclock.trees", treetype="clock", burnin=0.1)
map_tree = mccTree(treetrace, "analyses/fishes_relaxedclock.tree")

   Processing file "/home/tommaso/M2/Semestre_3/Phylo/repo_git/analyses/fishes_relaxedclock.trees"

Progress:
0---------------25---------------50---------------75--------------100
********************************************************************

   Compiling maximum clade credibility tree from 46 trees.
   
   Summarizing clades ...
   

Progress:
0---------------25---------------50---------------75--------------100
********************************************************************

   Annotating tree ...
