In [1]:
import itertools
import pandas as pd
import numpy as np
import toytree
import toyplot
import arviz as az
import pymc3 as pm

In [2]:
# Get tree for proper loading.
NSPECIES = 80
TREE = toytree.rtree.bdtree(
    ntips=NSPECIES,
    seed=666,
).mod.node_scale_root_height(1.0)

In [9]:
# True param values
𝛼_mean = 0.05
𝛼_std = 0.02
𝛽_mean = 3.0
𝛽_std = 0.2
𝜓_mean = 0.0
𝜓_std = 0.33

# 4 different clade effects on rate of RI (used for partial-pooling data)
𝜓_0_mean = 1.0
𝜓_0_std = 0.1
𝜓_1_mean = 0.5
𝜓_1_std = 0.05
𝜓_2_mean = -0.5
𝜓_2_std = 0.05
𝜓_3_mean = -1.0
𝜓_3_std = 0.1

# Group index
gidx = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [11]:
# Species data
SPECIES_DATA = pd.DataFrame({
    "gidx": gidx,
    "b": np.random.normal(𝛽_mean, 𝛽_std, TREE.ntips),
    "psi": np.random.normal(𝜓_mean, 𝜓_std, TREE.ntips),
    "psi_x": np.concatenate([
        np.random.normal(𝜓_0_mean, 𝜓_0_std, len(gidx[gidx == 0])),
        np.random.normal(𝜓_1_mean, 𝜓_1_std, len(gidx[gidx == 1])),
        np.random.normal(𝜓_2_mean, 𝜓_2_std, len(gidx[gidx == 2])),
        np.random.normal(𝜓_3_mean, 𝜓_3_std, len(gidx[gidx == 3])),
    ]),
})
SPECIES_DATA.head()

Unnamed: 0,gidx,b,psi,psi_x
0,0,2.881579,0.366762,0.957718
1,0,2.638508,-0.577982,1.068136
2,0,3.113867,0.16233,1.003055
3,0,3.059197,0.347652,1.103499
4,0,2.620887,0.036929,0.823357


In [5]:
def partpooled_logistic(x, y, idx0, idx1, gidx, trace):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0)
        sidx1 = pm.Data("spp_idx1", idx1)
        gidx = pm.Data("gidx", gidx)

        # parameters and error
        𝜓_mean = pm.Normal('𝜓_mean', mu=0., sigma=5., shape=4)
        𝜓_std = pm.HalfNormal('𝜓_std', 5., shape=4)
        𝜓_offset = pm.Normal('𝜓_offset', mu=0, sigma=1., shape=80)
        𝜓 = pm.Deterministic('𝜓', 𝜓_mean[gidx] + 𝜓_std[gidx] * 𝜓_offset)
        𝛽 = pm.Normal('𝛽', mu=0., sigma=10., shape=1)
        𝛼 = pm.Normal('𝛼', mu=0., sigma=10., shape=1)
        
        # linear model prediction
        effect = 𝛼 + (𝛽 + 𝜓[sidx0] + 𝜓[sidx1]) * x
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood (normal distributed errors)
        y = pm.Bernoulli("y", p=logit, observed=y)

        # sample posterior, skip burnin
        trace = pm.load_trace(trace)

        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

### Load 40 traces

In [137]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power025.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power025 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power025")

In [138]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power050.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power050 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power050")

In [139]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power075.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power075 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power075")

In [140]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power100.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power100 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power100")

In [141]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power125.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power125 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power125")

In [142]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power150.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power150 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power150")

In [143]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power175.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power175 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power175")

In [144]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power200.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power200 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power200")

In [145]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power225.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power225 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power225")

In [146]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power250.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power250 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power250")

In [147]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power275.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power275 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power275")

In [148]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power300.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power300 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power300")

In [149]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power325.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power325 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power325")

In [150]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power350.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power350 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power350")

In [151]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power375.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power375 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power375")

In [152]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power400.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power400 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power400")

In [153]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power425.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power425 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power425")

In [154]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power450.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power450 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power450")

In [155]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power475.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power475 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power475")

In [156]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power500.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power500 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power500")

In [157]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power525.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power525 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power525")

In [158]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power550.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power550 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power550")

In [159]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power575.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power575 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power575")

In [160]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power600.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power600 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power600")

In [161]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power625.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power625 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power625")

In [162]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power650.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power650 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power650")

In [163]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power675.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power675 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power675")

In [164]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power700.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power700 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power700")

In [165]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power725.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power725 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power725")

In [166]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power750.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power750 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power750")

In [167]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power775.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power775 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power775")

In [168]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power800.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power800 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power800")

In [169]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power825.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power825 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power825")

In [170]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power850.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power850 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power850")

In [171]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power875.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power875 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power875")

In [172]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power900.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power900 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power900")

In [173]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power925.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power925 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power925")

In [174]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power950.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power950 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power950")

In [175]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power975.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power975 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power975")

In [176]:
SAMPLE = pd.read_csv("/pinky/henry/power_results/samples/power1000.csv")
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]
power1000 = partpooled_logistic(*model_args, "/pinky/henry/power_results/traces/power1000")

In [177]:
# show plot of TRUE vs. ESTIMATED rates
c, a, m = toyplot.scatterplot(
    power1000['trace']['𝜓'].mean(axis=0),         # estimated
    SPECIES_DATA['psi_x'],                             # true
    width=400,
    height=250,
    xlabel="ESTIMATED species velocity",
    ylabel="TRUE species velocity",
    color=[toyplot.color.Palette()[i] for i in SPECIES_DATA.gidx],
);

In [29]:
import scipy.stats as stats
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [28]:
def lls(trace):
    return stats.linregress(trace['trace']['𝜓'].mean(axis=0), SPECIES_DATA['psi_x'])
    
def anova(trace):
    clade1 = trace['trace']['𝜓'].mean(axis=0)[:36]
    clade2 = trace['trace']['𝜓'].mean(axis=0)[36:45]
    clade3 = trace['trace']['𝜓'].mean(axis=0)[45:66]
    clade4 = trace['trace']['𝜓'].mean(axis=0)[66:]
    return f_oneway(clade1, clade2, clade3, clade4)

def tukey(trace):
    tukey = pairwise_tukeyhsd(endog = trace['trace']['𝜓'].mean(axis=0),
                          groups = gidx,
                          alpha = 0.05)
    print(tukey)

In [224]:
anova(power025)

F_onewayResult(statistic=72.77940669697665, pvalue=2.744137847836019e-22)

In [225]:
tukey(power025)

Multiple Comparison of Means - Tukey HSD, FWER=0.05 
group1 group2 meandiff p-adj   lower   upper  reject
----------------------------------------------------
     0      1   1.8492  0.001  0.9955   2.703   True
     0      2  -2.7478  0.001 -3.3768 -2.1187   True
     0      3  -1.0652 0.0012 -1.7868 -0.3437   True
     1      2   -4.597  0.001 -5.5097 -3.6843   True
     1      3  -2.9145  0.001 -3.8932 -1.9357   True
     2      3   1.6825  0.001  0.8921   2.473   True
----------------------------------------------------


In [223]:
c, a, m = toyplot.scatterplot(
    np.arange(2.5, 102.5, 2.5),
    [0.572, 0.601, 0.424, 0.762, 0.626, 0.776, 0.673, 0.969, 0.955, 0.863,
    0.956, 0.942, 0.865, 0.934, 0.974, 0.975, 0.887, 0.863, 0.968, 0.972,
    0.976, 0.918, 0.971, 0.941, 0.974, 0.968, 0.964, 0.972, 0.971, 0.955,
    0.977, 0.956, 0.978, 0.955, 0.965, 0.983, 0.977, 0.980, 0.979, 0.979],
    width=400,
    height=250,
    xlabel="Percent of crosses",
    ylabel="R squared value",
    # color=[toyplot.color.Palette()[i] for i in SPECIES_DATA.gidx],
);

In [228]:
import toyplot.png
toyplot.png.render(c, "/home/henry/png/power_analysis.png")

### Linear model

In [25]:
def partpooled_linear(x, y, idx0, idx1, gidx, **kwargs):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0)
        sidx1 = pm.Data("spp_idx1", idx1)
        gidx = pm.Data("gidx", gidx)

        # parameters and error
        𝜓_mean = pm.Normal('𝜓_mean', mu=0., sigma=5., shape=4)
        𝜓_std = pm.HalfNormal('𝜓_std', 5., shape=4)
        𝜓_offset = pm.Normal('𝜓_offset', mu=0, sigma=1., shape=TREE.ntips)
        𝜓 = pm.Deterministic('𝜓', 𝜓_mean[gidx] + 𝜓_std[gidx] * 𝜓_offset)
        𝛽 = pm.Normal('𝛽', mu=0., sigma=10., shape=1)
        𝛼 = pm.Normal('𝛼', mu=0., sigma=10., shape=1)
        
        # linear model prediction
        effect = 𝛼 + (𝛽 + 𝜓[sidx0] + 𝜓[sidx1]) * x
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood (normal distributed errors)
        y = pm.Bernoulli("y", p=logit, observed=y)

        # sample posterior, skip burnin
        trace = pm.load_trace("/home/henry/oaks-thesis/trace/linear-pmpd")

        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

In [26]:
%store -r linear_args

In [27]:
linear = partpooled_linear(*linear_args)

### Logarithmic model

In [28]:
def partpooled_logarithmic(x, y, idx0, idx1, gidx, **kwargs):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0)
        sidx1 = pm.Data("spp_idx1", idx1)
        gidx = pm.Data("gidx", gidx)

        # parameters and error
        𝜓_mean = pm.Normal('𝜓_mean', mu=0., sigma=5., shape=4)
        𝜓_std = pm.HalfNormal('𝜓_std', 5., shape=4)
        𝜓_offset = pm.Normal('𝜓_offset', mu=0, sigma=1., shape=TREE.ntips)
        𝜓 = pm.Deterministic('𝜓', 𝜓_mean[gidx] + 𝜓_std[gidx] * 𝜓_offset)
        𝛽 = pm.Normal('𝛽', mu=0., sigma=10., shape=1)
        𝛼 = pm.Normal('𝛼', mu=0., sigma=10., shape=1)
        
        # linear model prediction
        effect = 𝛼 + (𝛽 + 𝜓[sidx0] + 𝜓[sidx1]) * np.log(x)
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood (normal distributed errors)
        y = pm.Bernoulli("y", p=logit, observed=y)

        # sample posterior, skip burnin
        trace = pm.load_trace("/home/henry/oaks-thesis/trace/logarithmic-pmpd")

        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

In [29]:
%store -r logarithmic_args

In [30]:
logarithmic = partpooled_logarithmic(*logarithmic_args)

### Exponential model

In [32]:
def partpooled_exponential(x, y, idx0, idx1, gidx, **kwargs):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0)
        sidx1 = pm.Data("spp_idx1", idx1)
        gidx = pm.Data("gidx", gidx)

        # parameters and error
        𝜓_mean = pm.Normal('𝜓_mean', mu=0., sigma=5., shape=4)
        𝜓_std = pm.HalfNormal('𝜓_std', 5., shape=4)
        𝜓_offset = pm.Normal('𝜓_offset', mu=0, sigma=1., shape=TREE.ntips)
        𝜓 = pm.Deterministic('𝜓', 𝜓_mean[gidx] + 𝜓_std[gidx] * 𝜓_offset)
        # 𝛽 = pm.Normal('𝛽', mu=1., sigma=10., shape=1)
        𝛽 = pm.Normal('𝛽', mu=0.5, sigma=10, shape=1)
        𝛼 = pm.Normal('𝛼', mu=0., sigma=10., shape=1)
        
        # linear model prediction
        effect = 𝛼 * ((𝛽 + 𝜓[sidx0] + 𝜓[sidx1]) ** x)
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood (normal distributed errors)
        y = pm.Bernoulli("y", p=logit, observed=y)

        # sample posterior, skip burnin
        trace = pm.load_trace("/home/henry/oaks-thesis/trace/exponential-pmpd")

        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

In [33]:
%store -r exponential_args

In [34]:
exponential = partpooled_exponential(*exponential_args)

### Asymptotic model

In [35]:
def partpooled_asymptotic(x, y, idx0, idx1, gidx, **kwargs):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0)
        sidx1 = pm.Data("spp_idx1", idx1)
        gidx = pm.Data("gidx", gidx)

        # parameters and error
        𝜓_mean = pm.Normal('𝜓_mean', mu=0., sigma=5., shape=4)
        𝜓_std = pm.HalfNormal('𝜓_std', 5., shape=4)
        𝜓_offset = pm.Normal('𝜓_offset', mu=0, sigma=1., shape=TREE.ntips)
        𝜓 = pm.Deterministic('𝜓', 𝜓_mean[gidx] + 𝜓_std[gidx] * 𝜓_offset)
        𝛽 = pm.Normal('𝛽', mu=0., sigma=10., shape=1)
        𝛼 = pm.Normal('𝛼', mu=0., sigma=10., shape=1)
        
        # linear model prediction
        effect = 1 - (1 -𝛼) * np.exp(-(𝛽 + 𝜓[sidx0] + 𝜓[sidx1]) * x)
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood (normal distributed errors)
        y = pm.Bernoulli("y", p=logit, observed=y)

        # sample posterior, skip burnin
        trace = pm.load_trace("/home/henry/oaks-thesis/trace/asymptotic-pmpd")

        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

In [36]:
%store -r asymptotic_args

In [37]:
asymptotic = partpooled_asymptotic(*asymptotic_args)

### Quadratic model

In [38]:
def partpooled_quadratic(x, y, idx0, idx1, gidx, **kwargs):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0)
        sidx1 = pm.Data("spp_idx1", idx1)
        gidx = pm.Data("gidx", gidx)

        # parameters and error
        𝜓_mean = pm.Normal('𝜓_mean', mu=0., sigma=5., shape=4)
        𝜓_std = pm.HalfNormal('𝜓_std', 5., shape=4)
        𝜓_offset = pm.Normal('𝜓_offset', mu=0, sigma=1., shape=TREE.ntips)
        𝜓 = pm.Deterministic('𝜓', 𝜓_mean[gidx] + 𝜓_std[gidx] * 𝜓_offset)
        𝛽 = pm.Normal('𝛽', mu=0., sigma=10., shape=1)
        𝛼 = pm.Normal('𝛼', mu=0., sigma=10., shape=1)
        
        # linear model prediction
        effect = ((𝛽 + 𝜓[sidx0] + 𝜓[sidx1]) * x ** 2) + 𝛼 * x
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood (normal distributed errors)
        y = pm.Bernoulli("y", p=logit, observed=y)

        # sample posterior, skip burnin
        trace = pm.load_trace("/home/henry/oaks-thesis/trace/quadratic-pmpd")

        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

In [39]:
%store -r quadratic_args

In [40]:
quadratic = partpooled_quadratic(*quadratic_args)

### Model comparison

In [41]:
az_linear = az.from_pymc3(trace = linear['trace'], model = linear['model'])
az_logarithmic = az.from_pymc3(trace = logarithmic['trace'], model = logarithmic['model'])
az_exponential = az.from_pymc3(trace = exponential['trace'], model = exponential['model'])
az_asymptotic = az.from_pymc3(trace = asymptotic['trace'], model = asymptotic['model'])
az_quadratic = az.from_pymc3(trace = quadratic['trace'], model = quadratic['model'])

In [44]:
az.compare({
    "linear": az_linear,
    "logarithmic": az_logarithmic, 
    "exponential": az_exponential,
    "asymptotic": az_asymptotic,
    "quadratic": az_quadratic
           })

The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive accuracy.
  "\nThe scale is now log by default. Use 'scale' argument or "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Unnamed: 0,rank,loo,p_loo,d_loo,weight,se,dse,warning,loo_scale
asymptotic,0,-214.364,20.7363,0.0,1.0,25.391,0.0,True,log
linear,1,-521.206,22.5578,306.841,3.37411e-91,20.6062,33.2605,False,log
logarithmic,2,-633.661,34.4304,419.296,3.21467e-133,27.2756,33.1844,True,log
quadratic,3,-708.151,19.3844,493.786,9.893219999999999e-169,28.7793,31.9976,False,log
exponential,4,-728.756,17.3979,514.391,9.15282e-175,26.3133,35.6739,False,log


In [43]:
az.compare({
    "linear": az_linear,
    "logarithmic": az_logarithmic, 
    #"exponential": az_exponential,
    #"asymptotic": az_asymptotic,
    "quadratic": az_quadratic
           })

The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive accuracy.
  "\nThe scale is now log by default. Use 'scale' argument or "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Unnamed: 0,rank,loo,p_loo,d_loo,weight,se,dse,warning,loo_scale
linear,0,-521.206,22.5578,0.0,1.0,26.9655,0.0,False,log
logarithmic,1,-633.661,34.4304,112.455,2.7798399999999996e-20,26.9023,24.8671,True,log
quadratic,2,-708.151,19.3844,186.945,2.92849e-59,25.2237,18.2152,False,log
