# RevBayes script: A simple FBD analysis

##  Reading in the Data

In [1]:
# Import the morphological character matrix #
data <- readDiscreteCharacterData("../Projet/fishes/aln_prot.fst")

## helpers
n_species <- data.ntaxa()
n_branchs <- 2 * n_species - 2
taxa <- data.taxa()

# vector for moves
moves = VectorMoves()

   Missing Variable:	Variable os does not exist

   Successfully read one character matrix from file '../Projet/fishes/aln_prot.fst'


## Joint Fossilized Birth-Death Process prior on the topology and fossil occurrence times

In [2]:
# Define exponential priors on the birth rate and death rate #
speciation_rate ~ dnExponential(10)
extinction_rate ~ dnExponential(10)

# Specify a scale move on the speciation_rate parameter #
moves.append( mvScale(speciation_rate, weight=1) )

# Specify a scale move on the extinction_rate parameter #
moves.append( mvScale(extinction_rate, weight=1) )

# Create deterministic nodes for the diversification and turnover rates so that they can be monitored #
diversification := speciation_rate - extinction_rate
turnover := extinction_rate/speciation_rate


# Fix the probability of sampling parameter (rho) #
rho <- 0.000046

# Assume an exponential prior on the rate of sampling fossils (psi) #
psi ~ dnExponential(1) 

# Specify a scale move on the psi parameter #
moves.append( mvScale(psi, weight=1) )


# The FBD is conditioned on a starting time for the process, which is the origin time #
origin_time ~ dnUnif(0, 1500)

# Specify a sliding-window move on the origin_time parameter #
moves.append( mvSlide(origin_time, weight=1.0) )


### Define the tree-prior distribution as the fossilized birth-death process ###
fbd_tree ~ dnFBDP(origin=origin_time, lambda=speciation_rate, mu=extinction_rate, psi=psi, rho=rho, taxa=taxa)

# These moves update the node ages #
# Because we are conditioning on the origin time, we must also sample the root node age #
moves.append( mvNodeTimeSlideUniform(fbd_tree, weight=40.0) )
moves.append( mvRootTimeSlideUniform(fbd_tree, origin_time, weight=5.0) )
moves.append(mvSubtreeScale(fbd_tree, weight=4))

In [3]:
# Add a move to sample the fossil times #
moves.append( mvFossilTimeSlideUniform(fbd_tree, origin_time, weight=5.0) )

In [4]:
# read tree topology from file
tree <- readTrees("../Projet/fishes/vertebrate_Poisson_NJ.tree", treetype="clock")[1]

# ajust terminal branch lengths so as to make the tree ultrametric
tree.makeUltrametric()

# rescale all the nodes of the tree
tree.rescale(750/tree.rootAge())

# set the value of the tree topology
fbd_tree.setValue(tree)

   Attempting to read the contents of file "vertebrate_Poisson_NJ.tree"
   Successfully read file


## Use stratigraphic range data to explicitly sample the fossil occurence times

In [5]:
# node calibration
clade_1 <- clade("Cyprinus_carpio","Chimaera_monstrosa")
minimum_bound_clade_1 <- 422
maximum_bound_clade_1 <- 463
speciation_clade_1 := tmrca(fbd_tree, clade_1)
obs_age_clade_1 ~ dnSoftBoundUniformNormal(min=minimum_bound_clade_1, max=maximum_bound_clade_1, sd=2, p=0.05)
obs_age_clade_1.clamp( (minimum_bound_clade_1 + maximum_bound_clade_1)/2 )

clade_2 <- clade("Cyprinus_carpio","Homo_sapiens")
minimum_bound_clade_2 <- 416
maximum_bound_clade_2 <- 422
speciation_clade_2 := tmrca(fbd_tree, clade_2)
obs_age_clade_2 ~ dnSoftBoundUniformNormal(min=minimum_bound_clade_2, max=maximum_bound_clade_2, sd=2, p=0.05)
obs_age_clade_2.clamp( (minimum_bound_clade_2 + maximum_bound_clade_2)/2 )

clade_3 <- clade("Cyprinus_carpio","Tetraodon_nigroviridis")
minimum_bound_3 <- 150
speciation_clade_3 := tmrca(fbd_tree, clade_3)
obs_age_clade_3 ~ dnExponential(0.2, offset = -speciation_clade_3)
obs_age_clade_3.clamp( -minimum_bound_3 )

clade_6 <- clade("Xenopus_tropicalis","Homo_sapiens")
minimum_bound_clade_6 <- 330
maximum_bound_clade_6 <- 350
speciation_clade_6 := tmrca(fbd_tree, clade_6)
obs_age_clade_6 ~ dnSoftBoundUniformNormal(min=minimum_bound_clade_6, max=maximum_bound_clade_6, sd=2, p=0.05)
obs_age_clade_6.clamp( (minimum_bound_clade_6 + maximum_bound_clade_6)/2 )

clade_7 <- clade("Struthio_camelus","Homo_sapiens")
minimum_bound_clade_7 <- 312
maximum_bound_clade_7 <- 330
speciation_clade_7 := tmrca(fbd_tree, clade_7)
obs_age_clade_7 ~ dnSoftBoundUniformNormal(min=minimum_bound_clade_7, max=maximum_bound_clade_7, sd=2, p=0.05)
obs_age_clade_7.clamp( (minimum_bound_clade_7 + maximum_bound_clade_7)/2 )

clade_8 <- clade("Ornithorhynchus_anatinus","Homo_sapiens")
minimum_bound_clade_8 <- 163
maximum_bound_clade_8 <- 191
speciation_clade_8 := tmrca(fbd_tree, clade_8)
obs_age_clade_8 ~ dnSoftBoundUniformNormal(min=minimum_bound_clade_8, max=maximum_bound_clade_8, sd=2, p=0.05)
obs_age_clade_8.clamp( (minimum_bound_clade_8 + maximum_bound_clade_8)/2 )

clade_9 <- clade("Plesiobatis_daviesi","Chimaera_monstrosa")
minimum_bound_clade_9 <- 410
speciation_clade_9 := tmrca(fbd_tree, clade_9)
obs_age_clade_9 ~ dnExponential(0.2, offset = -speciation_clade_9)
obs_age_clade_9.clamp( -minimum_bound_clade_9 )

clade_10 <- clade("Plesiobatis_daviesi","Mustelus_manazo")
minimum_bound_10 <- 190
speciation_clade_10 := tmrca(fbd_tree, clade_10)
obs_age_clade_10 ~ dnExponential(0.2, offset = -speciation_clade_10)
obs_age_clade_10.clamp( -minimum_bound_10 )

clade_11 <- clade("Plesiobatis_daviesi","Okamejei_kenojei")
minimum_bound_11 <- 176
speciation_clade_11 := tmrca(fbd_tree, clade_11)
obs_age_clade_11 ~ dnExponential(0.2, offset = -speciation_clade_11)
obs_age_clade_11.clamp( -minimum_bound_11 )

clade_14 <- clade("Heterodontus_francisci","Mustelus_manazo")
minimum_bound_14 <- 176
speciation_clade_14 := tmrca(fbd_tree, clade_14)
obs_age_clade_14 ~ dnExponential(0.2, offset = -speciation_clade_14)
obs_age_clade_14.clamp( -minimum_bound_14 )

clade_15 <- clade("Scyliorhinus_canicula","Mustelus_manazo")
minimum_bound_15 <- 165
speciation_clade_15 := tmrca(fbd_tree, clade_15)
obs_age_clade_15 ~ dnExponential(0.2, offset = -speciation_clade_15)
obs_age_clade_15.clamp( -minimum_bound_15 )

clade_16 <- clade("Callorhinchus_callorynchus","Chimaera_monstrosa")
minimum_bound_16 <- 161
speciation_clade_16 := tmrca(fbd_tree, clade_16)
obs_age_clade_16 ~ dnExponential(0.2, offset = -speciation_clade_16)
obs_age_clade_16.clamp( -minimum_bound_16 )

clade_19 <- clade("Rhinochimaera_pacifica","Chimaera_monstrosa")
minimum_bound_19 <- 84
speciation_clade_19 := tmrca(fbd_tree, clade_19)
obs_age_clade_19 ~ dnExponential(0.2, offset = -speciation_clade_19)
obs_age_clade_19.clamp( -minimum_bound_19 )

## Create deterministic nodes to monitor various tree statistics

In [6]:
# Monitor the number of sampled ancestors in the FBD-tree #
num_samp_anc := fbd_tree.numSampledAncestors()

## Binary morphological substitution model

In [7]:
#Create the Q matrix. These data are binary, so we initialize the Jukes-Cantor matrix with
# two states
Q <- fnMtRev()

# We assume a strict morphological clock rate, drawn from an exponential prior #
clockrate ~ dnExponential(1.0)

moves.append( mvScale(clockrate, weight=4.0) )

### Create the substitution model and clamp with our observed Standard data ###
# Here we use the option siteMatrices=true specify that the vector Q #
# represents a site-specific mixture of rate matrices #
# We also condition on observing only variable characters using coding="variable" #
seq ~ dnPhyloCTMC(tree=fbd_tree, branchRates=clockrate, Q=Q, type="AA")
seq.clamp(data)

## MCMC

In [None]:
# initialize the model object #
mymodel = model(fbd_tree)

monitors = VectorMonitors()

# Create a vector of monitors #
# 1. for the full model #
monitors.append( mnModel(filename="../fbd_simple/output/fishes_FDB.log", printgen=10, exclude = ["F"]) )

# 2. the tree #
monitors.append( mnFile(filename="../fbd_simple/output/fishes_FDB.trees", printgen=10, fbd_tree) )

# 3. and a few select parameters to be printed to the screen #
monitors.append( mnScreen(printgen=100, num_samp_anc, origin_time) )

# Initialize the MCMC object #
mymcmc = mcmc(mymodel, monitors, moves)

# Run the MCMC #
mymcmc.run(generations=500)

   
   Running MCMC simulation
   This simulation runs 1 independent replicate.
   The simulator uses 9 different moves in a random move schedule with 62 moves per iteration
   

Iter        |      Posterior   |     Likelihood   |          Prior   |   num_samp_anc   |    origin_time   |    elapsed   |        ETA   |
------------------------------------------------------------------------------------------------------------------------------------------
0           |        -245016   |        -208234   |       -36782.4   |              0   |       1128.737   |   00:00:00   |   --:--:--   |

In [9]:
treetrace = readTreeTrace("../fbd_simple/output/fishes_FDB.trees", treetype="clock", burnin=0.1)
map_tree = mccTree(treetrace, "../fbd_simple/output/fishes_FDB.tree")

   Processing file "/home/tommaso/M2/Semestre_3/Phylo/repo_git/../fbd_simple/output/fishes_FDB.trees"

Progress:
0---------------25---------------50---------------75--------------100
********************************************************************

   Compiling maximum clade credibility tree from 46 trees.
   
   Summarizing clades ...
   

Progress:
0---------------25---------------50---------------75--------------100
********************************************************************

   Annotating tree ...
