In [1]:
import ipyrad.analysis as ipa
import ipyparallel as ipp
import numpy as np
import pandas as pd
import toyplot
import toytree

In [2]:
ipa.__version__

'0.9.62'

In [3]:
ipyclient = ipp.Client()
ipyclient.ids

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79]

In [4]:
# Set loci location.
loci = "/home/henry/phylo-timescale/alignments/model_g_small_diploid_loci.seqs.hdf5"

# Map samples to species names.
imap = {
    "r0" : ["r0"],
    "r1" : ["r1"],
    "r2" : ["r2"],
    "r3" : ["r3"],
    "r4" : ["r4"],
    "r5" : ["r5"],
    "r6" : ["r6"],
    "r7" : ["r7"],
    "r8" : ["r8"],
    "r9" : ["r9"],
    "r10" : ["r10"],
    "r11" : ["r11"],
    "r12" : ["r12"],
    "r13" : ["r13"],
    "r14" : ["r14"],
    "r15" : ["r15"],
    "r16" : ["r16"],
    "r17" : ["r17"],
    "r18" : ["r18"],
    "r19" : ["r19"],
    "r20" : ["r20"],
    "r21" : ["r21"],
    "r22" : ["r22"],
    "r23" : ["r23"],
    "r24" : ["r24"],
    "r25" : ["r25"],
    "r26" : ["r26"],
    "r27" : ["r27"],
    "r28" : ["r28"],
    "r29" : ["r29"],
    "r30" : ["r30"],
    "r31" : ["r31"],
    "r32" : ["r32"],
    "r33" : ["r33"],
    "r34" : ["r34"],
    "r35" : ["r35"],
    "r36" : ["r36"],
    "r37" : ["r37"],
    "r38" : ["r38"],
    "r39" : ["r39"],
    "r40" : ["r40"],
    "r41" : ["r41"],
    "r42" : ["r42"],
    "r43" : ["r43"],
    "r44" : ["r44"],
    "r45" : ["r45"],
    "r46" : ["r46"],
    "r47" : ["r47"],
    "r48" : ["r48"],
    "r49" : ["r49"],
}

In [5]:
# True species tree.
tree = "/home/henry/phylo-timescale/newick/realtree.tre"
truetree = toytree.tree(tree)
truetree.draw(layout='d', use_edge_lengths=True, node_labels="idx", node_sizes=12);

In [41]:
# Set up bpp object.
bpp_g = ipa.bpp(
    name="bpp_g_small",
    data=loci,
    guidetree=truetree.drop_tips([i for i in truetree.get_tip_labels() if int(i[1:]) >5]), 
    imap={"r{}".format(i) : ["r{}".format(i)] for i in range(6)},
    maxloci=500,
    workdir="/home/henry/phylo-timescale/bpp",
    reps_resample_loci=False,
    )

In [42]:
# Set parameters.
bpp_g.kwargs['thetaprior'] = (3.5, 0.015)
bpp_g.kwargs['tauprior'] = (8, 0.009)
bpp_g.kwargs['sampfreq'] = 3
bpp_g.kwargs['burnin'] = 100000
bpp_g.kwargs['nsample'] = 1000000

In [43]:
# Draw priors.
bpp_g.draw_priors(
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
);

In [None]:
bpp_g.ipcluster['cores'] = 8
bpp_g.run(nreps=4, auto=True, force=True)

Parallel connection | pinky: 8 cores
[locus filter] full data: 2000
[locus filter] post filter: 2000
[ipa bpp] bpp v4.1.4
[ipa.bpp] distributed 4 bpp jobs (name=bpp_g_small, nloci=500)
[#                   ]   5% 4:03:58 | progress on rep 0 

In [46]:
res, mcmc = bpp_g.summarize_results("00", individual_results = False)

[ipa.bpp] found 4 existing result files
[ipa.bpp] summarizing algorithm '00' results
[ipa.bpp] combining mcmc files


In [47]:
dfdiv, dfne, ttre, mtre = bpp_g.transform(mcmc,
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

In [48]:
dfdiv

Unnamed: 0,6,7,8,9,10
mean,3130000.0,4820000.0,6370000.0,7630000.0,8940000.0
median,3090000.0,4770000.0,6230000.0,7390000.0,8580000.0
std,586000.0,697000.0,1530000.0,2260000.0,3020000.0
min,1080000.0,1740000.0,1540000.0,1060000.0,812000.0
max,8110000.0,10500000.0,18100000.0,25400000.0,36200000.0
2.5%,2110000.0,3570000.0,3780000.0,3920000.0,4100000.0
97.5%,4400000.0,6300000.0,9750000.0,12700000.0,15800000.0


In [49]:
dfne

Unnamed: 0,6,7,8,9,10
mean,792000.0,694000.0,584000.0,6320000.0,1780000.0
median,707000.0,643000.0,544000.0,5.8e-221,1170000.0
std,456000.0,329000.0,264000.0,175000000.0,1910000.0
min,3670.0,10600.0,13400.0,0.0,0.0314
max,5370000.0,3620000.0,2870000.0,33500000000.0,34600000.0
2.5%,165000.0,207000.0,187000.0,0.0,28500.0
97.5%,1900000.0,1470000.0,1200000.0,10.4,6970000.0


In [51]:
bpp_g.draw_posteriors(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f86dde8af10>,
 <toyplot.canvas.Canvas at 0x7f851852d450>)

In [53]:
c, a = bpp_g.draw_posterior_tree(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
    node_dists=[10, 9, 8, 7, 6])

Old stuff
=====

In [27]:
dfdiv0, dfne0, ttre0, mtre0 = bpp_g.transform(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [28]:
dfdiv0

Unnamed: 0,6,7,8,9,10
mean,4310000.0,7300000.0,10300000.0,13000000.0,15200000.0
median,4240000.0,7180000.0,10100000.0,12700000.0,15000000.0
std,769000.0,1290000.0,1830000.0,2290000.0,2700000.0
min,1810000.0,3050000.0,4350000.0,5500000.0,6440000.0
max,9810000.0,16500000.0,23600000.0,29200000.0,34300000.0
2.5%,3020000.0,5130000.0,7190000.0,9100000.0,10700000.0
97.5%,6030000.0,10200000.0,14300000.0,18100000.0,21200000.0


In [29]:
dfne0

Unnamed: 0,6,7,8,9,10
mean,1360000.0,1340000.0,1200000.0,1310000.0,1100000.0
median,1340000.0,1320000.0,1170000.0,1290000.0,1090000.0
std,229000.0,223000.0,228000.0,230000.0,174000.0
min,587000.0,597000.0,471000.0,572000.0,550000.0
max,3080000.0,3160000.0,3120000.0,3070000.0,2350000.0
2.5%,976000.0,963000.0,814000.0,922000.0,812000.0
97.5%,1870000.0,1840000.0,1700000.0,1820000.0,1490000.0


In [40]:
bpp_g.draw_posteriors(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f86ddfc3e10>,
 <toyplot.canvas.Canvas at 0x7f86f72e77d0>)

In [39]:
c, a = bpp_g.draw_posterior_tree(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
    node_dists=[10, 9, 8, 7, 6])

In [30]:
dfdiv1, dfne1, ttre1, mtre1 = bpp_g.transform(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [31]:
dfdiv1

Unnamed: 0,6,7,8,9,10
mean,6130000.0,6160000.0,6160000.0,6160000.0,6160000.0
median,6030000.0,6060000.0,6060000.0,6060000.0,6060000.0
std,1080000.0,1090000.0,1090000.0,1090000.0,1090000.0
min,2600000.0,2610000.0,2610000.0,2610000.0,2610000.0
max,13800000.0,13900000.0,13900000.0,13900000.0,13900000.0
2.5%,4310000.0,4330000.0,4330000.0,4330000.0,4330000.0
97.5%,8540000.0,8590000.0,8590000.0,8590000.0,8590000.0


In [32]:
dfne1

Unnamed: 0,6,7,8,9,10
mean,31400.0,172000.0,233000.0,259000.0,7430000.0
median,31000.0,132000.0,180000.0,194000.0,7330000.0
std,4590.0,147000.0,198000.0,232000.0,1100000.0
min,16500.0,10.7,24.7,3.29,3840000.0
max,66300.0,2090000.0,2940000.0,3540000.0,16000000.0
2.5%,23700.0,11100.0,15500.0,13400.0,5580000.0
97.5%,41600.0,558000.0,750000.0,870000.0,9870000.0


In [16]:
bpp_g.draw_posteriors(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f8790355890>,
 <toyplot.canvas.Canvas at 0x7f878fb7aad0>)

In [22]:
c, a, m = bpp_g.draw_posterior_tree(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [33]:
dfdiv2, dfne2, ttre2, mtre2 = bpp_g.transform(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [34]:
dfdiv2

Unnamed: 0,6,7,8,9,10
mean,4240000.0,7190000.0,10400000.0,13000000.0,15500000.0
median,4160000.0,7070000.0,10200000.0,12800000.0,15200000.0
std,756000.0,1270000.0,1840000.0,2300000.0,2740000.0
min,1780000.0,3060000.0,4410000.0,5500000.0,6540000.0
max,9640000.0,16200000.0,23400000.0,29300000.0,34800000.0
2.5%,2960000.0,5050000.0,7300000.0,9130000.0,10900000.0
97.5%,5920000.0,10000000.0,14500000.0,18100000.0,21600000.0


In [35]:
dfne2

Unnamed: 0,6,7,8,9,10
mean,1420000.0,1430000.0,1160000.0,1400000.0,917000.0
median,1390000.0,1410000.0,1140000.0,1380000.0,903000.0
std,238000.0,228000.0,190000.0,236000.0,145000.0
min,611000.0,682000.0,550000.0,605000.0,443000.0
max,3210000.0,3120000.0,2620000.0,3180000.0,2080000.0
2.5%,1020000.0,1040000.0,838000.0,1010000.0,673000.0
97.5%,1950000.0,1940000.0,1580000.0,1930000.0,1240000.0


In [19]:
bpp_g.draw_posteriors(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f875d5b1f50>,
 <toyplot.canvas.Canvas at 0x7f875d515c50>)

In [20]:
c, a, m = bpp_g.draw_posterior_tree(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [36]:
dfdiv3, dfne3, ttre3, mtre3 = bpp_g.transform(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [37]:
dfdiv3

Unnamed: 0,6,7,8,9,10
mean,4390000.0,7520000.0,11400000.0,11400000.0,11400000.0
median,4320000.0,7390000.0,11200000.0,11200000.0,11200000.0
std,787000.0,1330000.0,2030000.0,2030000.0,2030000.0
min,1850000.0,3200000.0,4570000.0,4570000.0,4570000.0
max,10000000.0,17000000.0,25900000.0,25900000.0,25900000.0
2.5%,3070000.0,5280000.0,8020000.0,8020000.0,8020000.0
97.5%,6150000.0,10500000.0,15900000.0,15900000.0,15900000.0


In [38]:
dfne3

Unnamed: 0,6,7,8,9,10
mean,1340000.0,1320000.0,23600.0,212000.0,2570000.0
median,1320000.0,1300000.0,21000.0,167000.0,2530000.0
std,227000.0,208000.0,13500.0,175000.0,411000.0
min,619000.0,663000.0,98.0,60.2,1250000.0
max,3050000.0,2920000.0,172000.0,2560000.0,5590000.0
2.5%,956000.0,971000.0,5210.0,15900.0,1880000.0
97.5%,1840000.0,1790000.0,56800.0,667000.0,3490000.0


In [24]:
bpp_g.draw_posteriors(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f877f512810>,
 <toyplot.canvas.Canvas at 0x7f874c0a6250>)

In [25]:
c, a, m = bpp_g.draw_posterior_tree(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)