In [1]:
import ipyrad.analysis as ipa
import ipyparallel as ipp
import numpy as np
import pandas as pd
import toyplot
import toytree

In [2]:
ipa.__version__

'0.9.62'

In [3]:
ipyclient = ipp.Client()
ipyclient.ids

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79]

In [4]:
# Set loci location.
loci = "/home/henry/phylo-timescale/alignments/model_both_small_diploid_loci.seqs.hdf5"

# Map samples to species names.
imap = {
    "r0" : ["r0"],
    "r1" : ["r1"],
    "r2" : ["r2"],
    "r3" : ["r3"],
    "r4" : ["r4"],
    "r5" : ["r5"],
    "r6" : ["r6"],
    "r7" : ["r7"],
    "r8" : ["r8"],
    "r9" : ["r9"],
    "r10" : ["r10"],
    "r11" : ["r11"],
    "r12" : ["r12"],
    "r13" : ["r13"],
    "r14" : ["r14"],
    "r15" : ["r15"],
    "r16" : ["r16"],
    "r17" : ["r17"],
    "r18" : ["r18"],
    "r19" : ["r19"],
    "r20" : ["r20"],
    "r21" : ["r21"],
    "r22" : ["r22"],
    "r23" : ["r23"],
    "r24" : ["r24"],
    "r25" : ["r25"],
    "r26" : ["r26"],
    "r27" : ["r27"],
    "r28" : ["r28"],
    "r29" : ["r29"],
    "r30" : ["r30"],
    "r31" : ["r31"],
    "r32" : ["r32"],
    "r33" : ["r33"],
    "r34" : ["r34"],
    "r35" : ["r35"],
    "r36" : ["r36"],
    "r37" : ["r37"],
    "r38" : ["r38"],
    "r39" : ["r39"],
    "r40" : ["r40"],
    "r41" : ["r41"],
    "r42" : ["r42"],
    "r43" : ["r43"],
    "r44" : ["r44"],
    "r45" : ["r45"],
    "r46" : ["r46"],
    "r47" : ["r47"],
    "r48" : ["r48"],
    "r49" : ["r49"],
}

In [5]:
# True species tree.
tree = "/home/henry/phylo-timescale/newick/realtree.tre"
truetree = toytree.tree(tree)
truetree.draw(layout='d', use_edge_lengths=True, node_labels="idx", node_sizes=12);

In [35]:
# Set up bpp object.
bpp_both = ipa.bpp(
    name="bpp_both_small",
    data=loci,
    guidetree=truetree.drop_tips([i for i in truetree.get_tip_labels() if int(i[1:]) >5]), 
    imap={"r{}".format(i) : ["r{}".format(i)] for i in range(6)},
    maxloci=500,
    workdir="/home/henry/phylo-timescale/bpp",
    reps_resample_loci=False,
    )

In [36]:
# Set parameters.
bpp_both.kwargs['thetaprior'] = (3.5, 0.015)
bpp_both.kwargs['tauprior'] = (8, 0.009)
bpp_both.kwargs['sampfreq'] = 3
bpp_both.kwargs['burnin'] = 100000
bpp_both.kwargs['nsample'] = 1000000

In [37]:
# Draw priors.
bpp_both.draw_priors(
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
);

In [None]:
bpp_both.ipcluster['cores'] = 8
bpp_both.run(nreps=4, auto=True, force=True)

Parallel connection | pinky: 8 cores
[locus filter] full data: 2000
[locus filter] post filter: 2000
[ipa bpp] bpp v4.1.4
[ipa.bpp] distributed 4 bpp jobs (name=bpp_both_small, nloci=500)
[#                   ]   5% 5:22:43 | progress on rep 0 

In [39]:
res, mcmc = bpp_both.summarize_results("00", individual_results = False)

[ipa.bpp] found 4 existing result files
[ipa.bpp] summarizing algorithm '00' results
[ipa.bpp] combining mcmc files


In [40]:
dfdiv, dfne, ttre, mtre = bpp_both.transform(mcmc,
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

In [41]:
dfdiv

Unnamed: 0,6,7,8,9,10
mean,5520000.0,6720000.0,7320000.0,7600000.0,8200000.0
median,5440000.0,6660000.0,7240000.0,7480000.0,7940000.0
std,1080000.0,1030000.0,1190000.0,1520000.0,2420000.0
min,1840000.0,2850000.0,2860000.0,2450000.0,1060000.0
max,14700000.0,15300000.0,17200000.0,20600000.0,28100000.0
2.5%,3650000.0,4900000.0,5220000.0,4970000.0,4230000.0
97.5%,7850000.0,8920000.0,9870000.0,10900000.0,13600000.0


In [42]:
dfne

Unnamed: 0,6,7,8,9,10
mean,930000.0,728000.0,786000.0,3470000.0,4450000.0
median,648000.0,247000.0,370000.0,0.0,4260000.0
std,924000.0,1180000.0,1090000.0,125000000.0,1600000.0
min,0.107,1.67e-12,1.08e-07,0.0,267000.0
max,13500000.0,25100000.0,19900000.0,31900000000.0,18900000.0
2.5%,24600.0,81.4,988.0,0.0,1890000.0
97.5%,3410000.0,4160000.0,3880000.0,4.87e-05,8100000.0


In [43]:
bpp_both.draw_posteriors(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f190666ba10>,
 <toyplot.canvas.Canvas at 0x7f197dd46590>)

In [45]:
c, a = bpp_both.draw_posterior_tree(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
    node_dists=[10, 9, 8, 7, 6])

Old stuff
=====

In [23]:
dfdiv0, dfne0, ttre0, mtre0 = bpp_both.transform(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [24]:
dfdiv0

Unnamed: 0,6,7,8,9,10
mean,6480000.0,9030000.0,13200000.0,13500000.0,17200000.0
median,6370000.0,8880000.0,13000000.0,13300000.0,16900000.0
std,1170000.0,1630000.0,2380000.0,2440000.0,3070000.0
min,2880000.0,3520000.0,5560000.0,5890000.0,7240000.0
max,15000000.0,21400000.0,31400000.0,30900000.0,39200000.0
2.5%,4520000.0,6300000.0,9200000.0,9440000.0,12100000.0
97.5%,9090000.0,12700000.0,18500000.0,19000000.0,24100000.0


In [25]:
dfne0

Unnamed: 0,6,7,8,9,10
mean,3390000.0,3950000.0,1030000.0,68100000.0,2710000.0
median,3310000.0,3880000.0,664000.0,1.15e-16,2670000.0
std,750000.0,740000.0,1120000.0,626000000.0,447000.0
min,1070000.0,1430000.0,0.0562,0.0,1270000.0
max,9300000.0,10400000.0,19400000.0,44200000000.0,6230000.0
2.5%,2140000.0,2650000.0,16200.0,8.11e-127,1960000.0
97.5%,5080000.0,5690000.0,4070000.0,411000000.0,3710000.0


In [12]:
bpp_both.draw_posteriors(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f199f144c50>,
 <toyplot.canvas.Canvas at 0x7f199f0e1e50>)

In [13]:
c, a, m = bpp_both.draw_posterior_tree(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [26]:
dfdiv1, dfne1, ttre1, mtre1 = bpp_both.transform(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [27]:
dfdiv1

Unnamed: 0,6,7,8,9,10
mean,9600000.0,9600000.0,9600000.0,9600000.0,9600000.0
median,9430000.0,9440000.0,9440000.0,9440000.0,9440000.0
std,1700000.0,1700000.0,1700000.0,1700000.0,1700000.0
min,4060000.0,4060000.0,4060000.0,4060000.0,4060000.0
max,21600000.0,21600000.0,21600000.0,21600000.0,21600000.0
2.5%,6740000.0,6740000.0,6750000.0,6750000.0,6750000.0
97.5%,13400000.0,13400000.0,13400000.0,13400000.0,13400000.0


In [28]:
dfne1

Unnamed: 0,6,7,8,9,10
mean,23100.0,19400.0,220000.0,230000.0,8630000.0
median,22800.0,19100.0,174000.0,182000.0,8510000.0
std,3400.0,3050.0,178000.0,185000.0,1280000.0
min,12300.0,9750.0,10.7,11.3,4460000.0
max,49400.0,40900.0,2340000.0,2460000.0,18500000.0
2.5%,17400.0,14300.0,17800.0,18700.0,6480000.0
97.5%,30700.0,26200.0,682000.0,711000.0,11500000.0


In [15]:
bpp_both.draw_posteriors(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f198e3dd650>,
 <toyplot.canvas.Canvas at 0x7f198e35b9d0>)

In [16]:
c, a, m = bpp_both.draw_posterior_tree(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [29]:
dfdiv2, dfne2, ttre2, mtre2 = bpp_both.transform(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [30]:
dfdiv2

Unnamed: 0,6,7,8,9,10
mean,7040000.0,11700000.0,11700000.0,11700000.0,11700000.0
median,6920000.0,11500000.0,11500000.0,11500000.0,11500000.0
std,1250000.0,2070000.0,2070000.0,2070000.0,2070000.0
min,2930000.0,4950000.0,4960000.0,4960000.0,4960000.0
max,16100000.0,26400000.0,26400000.0,26400000.0,26400000.0
2.5%,4930000.0,8240000.0,8240000.0,8240000.0,8240000.0
97.5%,9840000.0,16300000.0,16300000.0,16300000.0,16300000.0


In [31]:
dfne2

Unnamed: 0,6,7,8,9,10
mean,2790000.0,22900.0,209000.0,216000.0,6260000.0
median,2740000.0,22500.0,164000.0,165000.0,6170000.0
std,466000.0,3360.0,173000.0,187000.0,929000.0
min,1340000.0,12000.0,40.5,4.69,3230000.0
max,6280000.0,48800.0,2740000.0,2770000.0,13500000.0
2.5%,2000000.0,17200.0,15500.0,13300.0,4700000.0
97.5%,3830000.0,30300.0,658000.0,706000.0,8320000.0


In [18]:
bpp_both.draw_posteriors(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f19535b66d0>,
 <toyplot.canvas.Canvas at 0x7f19647d7ed0>)

In [19]:
c, a, m = bpp_both.draw_posterior_tree(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [32]:
dfdiv3, dfne3, ttre3, mtre3 = bpp_both.transform(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [33]:
dfdiv3

Unnamed: 0,6,7,8,9,10
mean,6360000.0,8600000.0,13100000.0,13100000.0,17400000.0
median,6250000.0,8460000.0,12800000.0,12800000.0,17100000.0
std,1150000.0,1540000.0,2310000.0,2320000.0,3080000.0
min,2780000.0,3620000.0,5460000.0,5460000.0,7270000.0
max,14500000.0,19600000.0,29500000.0,29500000.0,39300000.0
2.5%,4440000.0,6020000.0,9160000.0,9170000.0,12200000.0
97.5%,8920000.0,12000000.0,18200000.0,18200000.0,24200000.0


In [34]:
dfne3

Unnamed: 0,6,7,8,9,10
mean,3110000.0,4380000.0,31900.0,10300000.0,2790000.0
median,3040000.0,4300000.0,31400.0,3.66e-53,2750000.0
std,692000.0,773000.0,4670.0,150000000.0,429000.0
min,925000.0,2060000.0,16800.0,0.0,1450000.0
max,9050000.0,10600000.0,67700.0,21200000000.0,6130000.0
2.5%,1960000.0,3070000.0,24000.0,0.0,2070000.0
97.5%,4670000.0,6100000.0,42200.0,6680000.0,3750000.0


In [21]:
bpp_both.draw_posteriors(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f197549c450>,
 <toyplot.canvas.Canvas at 0x7f1953393f10>)

In [22]:
c, a, m = bpp_both.draw_posterior_tree(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)