In [1]:
import ipyrad.analysis as ipa
import ipyparallel as ipp
import numpy as np
import pandas as pd
import toyplot
import toytree

In [2]:
ipa.__version__

'0.9.62'

In [3]:
ipyclient = ipp.Client()
ipyclient.ids

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79]

In [4]:
# Set loci location.
loci = "/home/henry/phylo-timescale/alignments/model_chp_small_diploid_loci.seqs.hdf5"

# Map samples to species names.
imap = {
    "r0" : ["r0"],
    "r1" : ["r1"],
    "r2" : ["r2"],
    "r3" : ["r3"],
    "r4" : ["r4"],
    "r5" : ["r5"],
    "r6" : ["r6"],
    "r7" : ["r7"],
    "r8" : ["r8"],
    "r9" : ["r9"],
    "r10" : ["r10"],
    "r11" : ["r11"],
    "r12" : ["r12"],
    "r13" : ["r13"],
    "r14" : ["r14"],
    "r15" : ["r15"],
    "r16" : ["r16"],
    "r17" : ["r17"],
    "r18" : ["r18"],
    "r19" : ["r19"],
    "r20" : ["r20"],
    "r21" : ["r21"],
    "r22" : ["r22"],
    "r23" : ["r23"],
    "r24" : ["r24"],
    "r25" : ["r25"],
    "r26" : ["r26"],
    "r27" : ["r27"],
    "r28" : ["r28"],
    "r29" : ["r29"],
    "r30" : ["r30"],
    "r31" : ["r31"],
    "r32" : ["r32"],
    "r33" : ["r33"],
    "r34" : ["r34"],
    "r35" : ["r35"],
    "r36" : ["r36"],
    "r37" : ["r37"],
    "r38" : ["r38"],
    "r39" : ["r39"],
    "r40" : ["r40"],
    "r41" : ["r41"],
    "r42" : ["r42"],
    "r43" : ["r43"],
    "r44" : ["r44"],
    "r45" : ["r45"],
    "r46" : ["r46"],
    "r47" : ["r47"],
    "r48" : ["r48"],
    "r49" : ["r49"],
}

In [5]:
# True species tree.
tree = "/home/henry/phylo-timescale/newick/realtree.tre"
truetree = toytree.tree(tree)
truetree.draw(layout='d', use_edge_lengths=True, node_labels="idx", node_sizes=12);

In [36]:
# Set up bpp object.
bpp_chp = ipa.bpp(
    name="bpp_chp_small",
    data=loci,
    guidetree=truetree.drop_tips([i for i in truetree.get_tip_labels() if int(i[1:]) >5]), 
    imap={"r{}".format(i) : ["r{}".format(i)] for i in range(6)},
    maxloci=500,
    workdir="/home/henry/phylo-timescale/bpp",
    reps_resample_loci=False,
    )

In [37]:
# Set parameters.
bpp_chp.kwargs['thetaprior'] = (3.5, 0.015)
bpp_chp.kwargs['tauprior'] = (8, 0.009)
bpp_chp.kwargs['sampfreq'] = 3
bpp_chp.kwargs['burnin'] = 100000
bpp_chp.kwargs['nsample'] = 1000000

In [38]:
# Draw priors.
bpp_chp.draw_priors(
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
);

In [None]:
bpp_chp.ipcluster['cores'] = 8
bpp_chp.run(nreps=4, auto=True, force=True)

Parallel connection | pinky: 8 cores
[locus filter] full data: 1
[locus filter] post filter: 1
[ipa bpp] bpp v4.1.4
[ipa.bpp] distributed 4 bpp jobs (name=bpp_chp_small, nloci=1)
[####################] 100% 0:05:21 | progress on all reps 


In [40]:
res, mcmc = bpp_chp.summarize_results("00", individual_results = False)

[ipa.bpp] found 4 existing result files
[ipa.bpp] summarizing algorithm '00' results
[ipa.bpp] combining mcmc files


In [41]:
dfdiv, dfne, ttre, mtre = bpp_chp.transform(mcmc,
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

In [42]:
dfdiv

Unnamed: 0,6,7,8,9,10
mean,29400.0,55800.0,77900.0,96200.0,111000.0
median,21600.0,48500.0,71400.0,90300.0,106000.0
std,27200.0,35000.0,38900.0,40800.0,41900.0
min,0.043,60.9,770.0,3780.0,5730.0
max,420000.0,440000.0,477000.0,481000.0,501000.0
2.5%,1240.0,9700.0,21800.0,34200.0,45800.0
97.5%,102000.0,143000.0,171000.0,192000.0,208000.0


In [43]:
dfne

Unnamed: 0,6,7,8,9,10
mean,155000.0,163000.0,172000.0,180000.0,4250000.0
median,122000.0,129000.0,135000.0,139000.0,4030000.0
std,128000.0,133000.0,142000.0,152000.0,1670000.0
min,2.47,1.47,2.64,1.97,222000.0
max,1740000.0,1900000.0,1930000.0,2050000.0,20000000.0
2.5%,10700.0,12000.0,11900.0,11200.0,1630000.0
97.5%,487000.0,507000.0,540000.0,577000.0,8090000.0


In [44]:
bpp_chp.draw_posteriors(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f8e4fa0f450>,
 <toyplot.canvas.Canvas at 0x7f8ee82515d0>)

In [47]:
c, a = bpp_chp.draw_posterior_tree(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
    node_dists=[10, 9, 8, 7, 6])

Old stuff
=====

In [24]:
dfdiv0, dfne0, ttre0, mtre0 = bpp_chp.transform(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [25]:
dfdiv0

Unnamed: 0,6,7,8,9,10
mean,43600.0,82400.0,115000.0,141000.0,163000.0
median,31500.0,70600.0,103000.0,131000.0,153000.0
std,41300.0,54100.0,61200.0,65300.0,68000.0
min,0.344,167.0,1550.0,6550.0,10300.0
max,685000.0,769000.0,796000.0,775000.0,890000.0
2.5%,1760.0,13500.0,29900.0,46700.0,62000.0
97.5%,154000.0,219000.0,264000.0,298000.0,324000.0


In [26]:
dfne0

Unnamed: 0,6,7,8,9,10
mean,227000.0,239000.0,250000.0,263000.0,6190000.0
median,177000.0,186000.0,194000.0,203000.0,5810000.0
std,187000.0,200000.0,211000.0,223000.0,2590000.0
min,46.7,31.4,47.0,33.9,441000.0
max,2740000.0,3150000.0,3000000.0,3370000.0,28200000.0
2.5%,16800.0,16900.0,17200.0,17500.0,2300000.0
97.5%,714000.0,761000.0,801000.0,845000.0,12300000.0


In [13]:
bpp_chp.draw_posteriors(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f8ef9817610>,
 <toyplot.canvas.Canvas at 0x7f8c690ebe90>)

In [14]:
c, a, m = bpp_chp.draw_posterior_tree(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [27]:
dfdiv1, dfne1, ttre1, mtre1 = bpp_chp.transform(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [28]:
dfdiv1

Unnamed: 0,6,7,8,9,10
mean,42900.0,81100.0,113000.0,140000.0,162000.0
median,30600.0,69100.0,102000.0,129000.0,151000.0
std,41400.0,54200.0,61100.0,65500.0,68300.0
min,0.447,113.0,1230.0,6700.0,9490.0
max,699000.0,709000.0,776000.0,844000.0,883000.0
2.5%,1570.0,12700.0,28900.0,45300.0,60500.0
97.5%,154000.0,218000.0,263000.0,297000.0,324000.0


In [29]:
dfne1

Unnamed: 0,6,7,8,9,10
mean,226000.0,239000.0,251000.0,262000.0,6220000.0
median,178000.0,184000.0,193000.0,202000.0,5820000.0
std,185000.0,204000.0,215000.0,222000.0,2660000.0
min,2.31,15.9,11.2,31.1,374000.0
max,2440000.0,2720000.0,3060000.0,2960000.0,33200000.0
2.5%,17400.0,15400.0,16000.0,17300.0,2240000.0
97.5%,706000.0,772000.0,815000.0,842000.0,12500000.0


In [16]:
bpp_chp.draw_posteriors(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f8ee8adee50>,
 <toyplot.canvas.Canvas at 0x7f8ee8a684d0>)

In [17]:
c, a, m = bpp_chp.draw_posterior_tree(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [30]:
dfdiv2, dfne2, ttre2, mtre2 = bpp_chp.transform(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [31]:
dfdiv2

Unnamed: 0,6,7,8,9,10
mean,42800.0,81100.0,113000.0,140000.0,161000.0
median,31200.0,69900.0,103000.0,130000.0,151000.0
std,40100.0,52100.0,58500.0,62200.0,64800.0
min,1.33,237.0,1860.0,7560.0,12100.0
max,730000.0,748000.0,800000.0,721000.0,805000.0
2.5%,1850.0,14100.0,31200.0,48300.0,63800.0
97.5%,150000.0,212000.0,255000.0,288000.0,315000.0


In [32]:
dfne2

Unnamed: 0,6,7,8,9,10
mean,227000.0,238000.0,251000.0,262000.0,6180000.0
median,171000.0,187000.0,192000.0,198000.0,5780000.0
std,200000.0,196000.0,216000.0,232000.0,2620000.0
min,4.86,64.8,8.22,5.09,323000.0
max,2600000.0,2920000.0,3430000.0,3310000.0,32700000.0
2.5%,12500.0,17900.0,15700.0,14300.0,2250000.0
97.5%,755000.0,749000.0,816000.0,873000.0,12400000.0


In [19]:
bpp_chp.draw_posteriors(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f8eb5bab610>,
 <toyplot.canvas.Canvas at 0x7f8ec6e4c810>)

In [20]:
c, a, m = bpp_chp.draw_posterior_tree(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [33]:
dfdiv3, dfne3, ttre3, mtre3 = bpp_chp.transform(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [34]:
dfdiv3

Unnamed: 0,6,7,8,9,10
mean,42900.0,81400.0,114000.0,141000.0,163000.0
median,31100.0,70100.0,103000.0,131000.0,153000.0
std,40400.0,52800.0,59500.0,63400.0,66000.0
min,0.634,154.0,2000.0,7710.0,13200.0
max,653000.0,711000.0,811000.0,785000.0,815000.0
2.5%,1780.0,13800.0,31000.0,48300.0,64200.0
97.5%,150000.0,213000.0,259000.0,293000.0,319000.0


In [35]:
dfne3

Unnamed: 0,6,7,8,9,10
mean,226000.0,238000.0,251000.0,264000.0,6190000.0
median,173000.0,184000.0,193000.0,201000.0,5800000.0
std,196000.0,202000.0,215000.0,228000.0,2600000.0
min,20.1,28.3,11.7,23.3,255000.0
max,2930000.0,2790000.0,3080000.0,3080000.0,32300000.0
2.5%,13700.0,16000.0,16000.0,16000.0,2280000.0
97.5%,740000.0,767000.0,815000.0,862000.0,12300000.0


In [22]:
bpp_chp.draw_posteriors(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f8ea486bf90>,
 <toyplot.canvas.Canvas at 0x7f8eb5a22b90>)

In [23]:
c, a, m = bpp_chp.draw_posterior_tree(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)