In [1]:
import ipyrad.analysis as ipa
import ipyparallel as ipp
import numpy as np
import pandas as pd
import toyplot
import toytree

In [2]:
ipa.__version__

'0.9.62'

In [2]:
ipyclient = ipp.Client()
ipyclient.ids

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79]

In [3]:
# Set loci location.
loci = "/home/henry/phylo-timescale/alignments/model_real_small_diploid_loci.seqs.hdf5"

# Map samples to species names.
imap = {
    "r0" : ["r0"],
    "r1" : ["r1"],
    "r2" : ["r2"],
    "r3" : ["r3"],
    "r4" : ["r4"],
    "r5" : ["r5"],
    "r6" : ["r6"],
    "r7" : ["r7"],
    "r8" : ["r8"],
    "r9" : ["r9"],
    "r10" : ["r10"],
    "r11" : ["r11"],
    "r12" : ["r12"],
    "r13" : ["r13"],
    "r14" : ["r14"],
    "r15" : ["r15"],
    "r16" : ["r16"],
    "r17" : ["r17"],
    "r18" : ["r18"],
    "r19" : ["r19"],
    "r20" : ["r20"],
    "r21" : ["r21"],
    "r22" : ["r22"],
    "r23" : ["r23"],
    "r24" : ["r24"],
    "r25" : ["r25"],
    "r26" : ["r26"],
    "r27" : ["r27"],
    "r28" : ["r28"],
    "r29" : ["r29"],
    "r30" : ["r30"],
    "r31" : ["r31"],
    "r32" : ["r32"],
    "r33" : ["r33"],
    "r34" : ["r34"],
    "r35" : ["r35"],
    "r36" : ["r36"],
    "r37" : ["r37"],
    "r38" : ["r38"],
    "r39" : ["r39"],
    "r40" : ["r40"],
    "r41" : ["r41"],
    "r42" : ["r42"],
    "r43" : ["r43"],
    "r44" : ["r44"],
    "r45" : ["r45"],
    "r46" : ["r46"],
    "r47" : ["r47"],
    "r48" : ["r48"],
    "r49" : ["r49"],
}

In [4]:
# True species tree.
tree = "/home/henry/phylo-timescale/newick/realtree.tre"
truetree = toytree.tree(tree)
truetree.draw(layout='d', use_edge_lengths=True, node_labels="idx", node_sizes=12);

In [87]:
# Set up bpp object.
bpp_real = ipa.bpp(
    name="bpp_real_small",
    data=loci,
    guidetree=truetree.drop_tips([i for i in truetree.get_tip_labels() if int(i[1:]) >5]), 
    imap={"r{}".format(i) : ["r{}".format(i)] for i in range(6)},
    maxloci=500,
    workdir="/home/henry/phylo-timescale/bpp",
    reps_resample_loci=False,
    )

In [6]:
# Set parameters.
bpp_real.kwargs['thetaprior'] = (3.5, 0.015)
bpp_real.kwargs['tauprior'] = (10, 0.009)
bpp_real.kwargs['sampfreq'] = 3
bpp_real.kwargs['burnin'] = 100000
bpp_real.kwargs['nsample'] = 1000000

In [103]:
# Draw priors.
bpp_real.draw_priors(
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
);

In [None]:
bpp_real.ipcluster['cores'] = 8
bpp_real.run(nreps=4, auto=True, force=True)

Parallel connection | pinky: 8 cores
[locus filter] full data: 2000
[locus filter] post filter: 2000
[ipa bpp] bpp v4.1.4
[ipa.bpp] distributed 4 bpp jobs (name=bpp_real_small, nloci=500)
[#                   ]   8% 5:25:47 | progress on rep 0 

In [3]:
# Short code to reload a bpp object.
bpp_real = ipa.bpp(    
    name="bpp_real_small",
    workdir="/home/henry/phylo-timescale/bpp"
)

In [4]:
res, mcmc = bpp_real.summarize_results("00", individual_results = False)

[ipa.bpp] found 4 existing result files
[ipa.bpp] summarizing algorithm '00' results
[ipa.bpp] combining mcmc files


In [92]:
dfdiv, dfne, ttre, mtre = bpp_real.transform(mcmc,
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

In [93]:
dfdiv

Unnamed: 0,6,7,8,9,10
mean,3020000.0,4710000.0,6170000.0,7660000.0,8930000.0
median,2960000.0,4680000.0,6050000.0,7420000.0,8580000.0
std,662000.0,618000.0,1380000.0,2250000.0,2990000.0
min,870000.0,2280000.0,1620000.0,1090000.0,862000.0
max,7800000.0,9480000.0,16200000.0,28400000.0,37200000.0
2.5%,1890000.0,3600000.0,3800000.0,3960000.0,4140000.0
97.5%,4470000.0,6020000.0,9210000.0,12700000.0,15700000.0


In [94]:
dfne

Unnamed: 0,6,7,8,9,10
mean,859000.0,643000.0,640000.0,26500000.0,1830000.0
median,767000.0,596000.0,593000.0,3.61e-33,1270000.0
std,492000.0,301000.0,301000.0,316000000.0,1850000.0
min,4560.0,10800.0,10200.0,0.0,0.227
max,5780000.0,3300000.0,3310000.0,35800000000.0,36700000.0
2.5%,180000.0,195000.0,192000.0,1.1100000000000001e-213,44600.0
97.5%,2060000.0,1350000.0,1350000.0,62400000.0,6800000.0


In [117]:
taus = sorted([i for i in mcmc.columns if "tau_" in i], key=lambda x: len(x))[-1]
taus = mcmc.loc[:, taus]
mags, edges = np.histogram(taus, bins=100, density=True)

In [7]:
bpp_real.draw_posteriors(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

(<toyplot.canvas.Canvas at 0x7f3f1c6a2550>,
 <toyplot.canvas.Canvas at 0x7f41d47010d0>)

In [97]:
c, a = bpp_real.draw_posterior_tree(mcmc,    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
    node_dists=[10, 9, 8, 7, 6])

Old stuff
=====

In [40]:
dfdiv0, dfne0, ttre0, mtre0 = bpp_real.transform(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [45]:
dfdiv0

Unnamed: 0,6,7,8,9,10
mean,4040000.0,6770000.0,9080000.0,11600000.0,14500000.0
median,3950000.0,6540000.0,8720000.0,11100000.0,14000000.0
std,901000.0,1940000.0,2850000.0,3850000.0,3860000.0
min,1330000.0,1590000.0,1530000.0,1960000.0,3530000.0
max,11100000.0,23000000.0,34900000.0,43900000.0,45800000.0
2.5%,2560000.0,3670000.0,4600000.0,5600000.0,8250000.0
97.5%,6090000.0,11200000.0,15700000.0,20500000.0,23300000.0


In [44]:
dfne0

Unnamed: 0,6,7,8,9,10
mean,1210000.0,1040000.0,1140000.0,306000000.0,1640000.0
median,1150000.0,992000.0,1090000.0,4750000.0,1150000.0
std,455000.0,350000.0,380000.0,896000000.0,1640000.0
min,126000.0,124000.0,168000.0,5.809999999999999e-46,0.613
max,5150000.0,4600000.0,4510000.0,22600000000.0,24000000.0
2.5%,509000.0,486000.0,539000.0,6.16e-05,46700.0
97.5%,2270000.0,1840000.0,2020000.0,2810000000.0,6050000.0


In [68]:
bpp_real.draw_posteriors(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

  return n/db/n.sum(), bin_edges


(<toyplot.canvas.Canvas at 0x7f530b86fad0>,
 <toyplot.canvas.Canvas at 0x7f536075df50>)

In [79]:
c, a = bpp_real.draw_posterior_tree(mcmc[0],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=9e-9,
    mutrate_max=2e-8,
    node_dists=[10, 9, 8, 7, 6])
# True mutation rate is 1e-8.

In [56]:
dfdiv1, dfne1, ttre1, mtre1 = bpp_real.transform(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [58]:
dfdiv1

Unnamed: 0,6,7,8,9,10
mean,5520000.0,5520000.0,5520000.0,5520000.0,5520000.0
median,5430000.0,5430000.0,5430000.0,5430000.0,5430000.0
std,978000.0,978000.0,978000.0,978000.0,978000.0
min,2350000.0,2350000.0,2350000.0,2350000.0,2350000.0
max,12500000.0,12500000.0,12500000.0,12500000.0,12500000.0
2.5%,3880000.0,3880000.0,3880000.0,3880000.0,3880000.0
97.5%,7700000.0,7700000.0,7700000.0,7700000.0,7700000.0


In [59]:
dfne1

Unnamed: 0,6,7,8,9,10
mean,28500.0,207000.0,213000.0,219000.0,7360000.0
median,28100.0,162000.0,167000.0,173000.0,7260000.0
std,4190.0,170000.0,175000.0,180000.0,1090000.0
min,15100.0,48.5,53.2,2.01,3800000.0
max,60600.0,2300000.0,2760000.0,2560000.0,15800000.0
2.5%,21500.0,15700.0,16100.0,16800.0,5530000.0
97.5%,37800.0,649000.0,669000.0,687000.0,9780000.0


In [71]:
bpp_real.draw_posteriors(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,)

(<toyplot.canvas.Canvas at 0x7f52d8e1f810>,
 <toyplot.canvas.Canvas at 0x7f52d8e6c8d0>)

In [74]:
c, a = bpp_real.draw_posterior_tree(mcmc[1],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=1e-8,
    mutrate_max=1.001e-8,
    node_dists=[10, 9, 8, 7, 6])

In [60]:
dfdiv2, dfne2, ttre2, mtre2 = bpp_real.transform(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [61]:
dfdiv2

Unnamed: 0,6,7,8,9,10
mean,4190000.0,7030000.0,9980000.0,12600000.0,15400000.0
median,4120000.0,6910000.0,9810000.0,12400000.0,15100000.0
std,749000.0,1240000.0,1770000.0,2230000.0,2720000.0
min,1760000.0,2990000.0,4230000.0,5350000.0,6510000.0
max,9540000.0,15900000.0,22500000.0,28400000.0,34600000.0
2.5%,2940000.0,4930000.0,7010000.0,8850000.0,10800000.0
97.5%,5860000.0,9800000.0,13900000.0,17600000.0,21400000.0


In [62]:
dfne2

Unnamed: 0,6,7,8,9,10
mean,1330000.0,1300000.0,1140000.0,1480000.0,1080000.0
median,1310000.0,1280000.0,1120000.0,1450000.0,1070000.0
std,224000.0,209000.0,185000.0,258000.0,174000.0
min,620000.0,651000.0,535000.0,658000.0,486000.0
max,3020000.0,2810000.0,2450000.0,3540000.0,2450000.0
2.5%,952000.0,948000.0,828000.0,1040000.0,791000.0
97.5%,1830000.0,1760000.0,1550000.0,2050000.0,1470000.0


In [25]:
bpp_real.draw_posteriors(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

(<toyplot.canvas.Canvas at 0x7f532df0de50>,
 <toyplot.canvas.Canvas at 0x7f532deeca10>)

In [38]:
c, a, m = bpp_real.draw_posterior_tree(mcmc[2],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)

In [63]:
dfdiv3, dfne3, ttre3, mtre3 = bpp_real.transform(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9)

In [64]:
dfdiv3

Unnamed: 0,6,7,8,9,10
mean,3940000.0,7260000.0,9940000.0,12800000.0,15600000.0
median,3880000.0,7140000.0,9770000.0,12600000.0,15400000.0
std,705000.0,1290000.0,1760000.0,2270000.0,2770000.0
min,1660000.0,3090000.0,4210000.0,5430000.0,6600000.0
max,8980000.0,16400000.0,22400000.0,28900000.0,35200000.0
2.5%,2760000.0,5100000.0,6980000.0,9010000.0,11000000.0
97.5%,5520000.0,10100000.0,13900000.0,17900000.0,21800000.0


In [65]:
dfne3

Unnamed: 0,6,7,8,9,10
mean,1510000.0,1170000.0,1210000.0,1390000.0,995000.0
median,1480000.0,1160000.0,1190000.0,1360000.0,981000.0
std,249000.0,190000.0,195000.0,230000.0,157000.0
min,666000.0,548000.0,569000.0,662000.0,480000.0
max,3370000.0,2530000.0,2660000.0,3070000.0,2260000.0
2.5%,1090000.0,854000.0,879000.0,999000.0,730000.0
97.5%,2060000.0,1600000.0,1640000.0,1900000.0,1340000.0


In [26]:
bpp_real.draw_posteriors(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,)

(<toyplot.canvas.Canvas at 0x7f532de3ee50>,
 <toyplot.canvas.Canvas at 0x7f532de64110>)

In [39]:
c, a, m = bpp_real.draw_posterior_tree(mcmc[3],    
    gentime_min=0.8, 
    gentime_max=1.2,
    mutrate_min=5e-9,
    mutrate_max=9e-9,
    node_dists=[10, 9, 8, 7, 6])

ValueError: not enough values to unpack (expected 3, got 2)