In [1]:
import itertools
import pandas as pd
import numpy as np
import toytree
import toyplot
import arviz as az
import pymc3 as pm

### Tree and dataframe setup

In [2]:
# generate a random tree
NSPECIES = 80
TREE = toytree.rtree.bdtree(
    ntips=NSPECIES,
    seed=666,
).mod.node_scale_root_height(1.0)

# node idxs that delimit several distinct clades on this tree
CLADES = [152, 153, 154, 155]

# draw and color the four major clades
TREE.draw(
    layout='d', 
    width=500,
    tip_labels=False,
    edge_colors=TREE.get_edge_values_mapped({
        j: toytree.colors[i] for i, j in enumerate(CLADES)
    }),
    scalebar=True,
);

In [3]:
# make group index (gidx)
crown_dict = {i: TREE.get_tip_labels(i) for i in CLADES}
gidx = np.zeros(TREE.ntips, dtype=int)
for tidx, tip in enumerate(TREE.get_tip_labels()):
    for cidx, clade in enumerate(crown_dict):
        if tip in crown_dict[clade]:
            gidx[tidx] = cidx
gidx

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [4]:
# True param values
ùõº_mean = 0.05
ùõº_std = 0.02
ùõΩ_mean = 3.0
ùõΩ_std = 0.2
ùúì_mean = 0.0
ùúì_std = 0.33

# 4 different clade effects on rate of RI (used for partial-pooling data)
ùúì_0_mean = 1.0
ùúì_0_std = 0.1
ùúì_1_mean = 0.5
ùúì_1_std = 0.05
ùúì_2_mean = -0.5
ùúì_2_std = 0.05
ùúì_3_mean = -1.0
ùúì_3_std = 0.1

In [51]:
# species dataframe
SPECIES_DATA = pd.DataFrame({
    "gidx": gidx,
    "b": np.random.normal(ùõΩ_mean, ùõΩ_std, TREE.ntips),
    "psi": np.random.normal(ùúì_mean, ùúì_std, TREE.ntips),
    "psi_x": np.concatenate([
        np.random.normal(ùúì_0_mean, ùúì_0_std, len(gidx[gidx == 0])),
        np.random.normal(ùúì_1_mean, ùúì_1_std, len(gidx[gidx == 1])),
        np.random.normal(ùúì_2_mean, ùúì_2_std, len(gidx[gidx == 2])),
        np.random.normal(ùúì_3_mean, ùúì_3_std, len(gidx[gidx == 3])),
    ]),
})
SPECIES_DATA.head()

Unnamed: 0,gidx,b,psi,psi_x
0,0,2.998961,0.365273,1.079141
1,0,2.936681,-0.547848,0.957262
2,0,2.802832,-0.150277,1.040836
3,0,2.960643,0.360127,0.844379
4,0,3.186135,0.206865,0.987505


### Generate crossing data

In [53]:
def get_dist(tree, idx0, idx1):
    "returns the genetic distance between two nodes on a tree"
    dist = tree.treenode.get_distance(
        tree.idx_dict[idx0], 
        tree.idx_dict[idx1],
    )
    return dist

# get all combinations of two sampled taxa
a, b = zip(*itertools.combinations(range(NSPECIES), 2))

# organize into DF and get genetic distance between pairs
DATA = pd.DataFrame({
    "sidx0": a,
    "sidx1": b,
    "dist": [(get_dist(TREE, i, j) / 2) for (i, j) in zip(a, b)],
})

DATA['b'] = np.random.normal(ùõΩ_mean, ùõΩ_std, DATA.shape[0])
DATA['velo'] = (
    DATA['b']
    + SPECIES_DATA['psi'][DATA.sidx0].values
    + SPECIES_DATA['psi'][DATA.sidx1].values
)
DATA['velo_x'] = (
    DATA['b']
    + SPECIES_DATA['psi_x'][DATA.sidx0].values
    + SPECIES_DATA['psi_x'][DATA.sidx1].values
)
DATA['intercept'] = np.random.normal(ùõº_mean, ùõº_std, DATA.shape[0])
# DATA['error'] = np.random.normal(0.0, ùúé_std, DATA.shape[0])

# get logits
DATA['logit_b'] = (
    1 / (1 + np.exp(-(DATA.intercept + DATA.b * np.log(DATA.dist))))
)
DATA['logit'] = (
    1 / (1 + np.exp(-(DATA.intercept + DATA.velo * np.log(DATA.dist))))
)
DATA['logit_x'] = (
    1 / (1 + np.exp(-(DATA.intercept + DATA.velo_x * np.log(DATA.dist))))
)

# get RI estimates
DATA['RI_pooled'] = np.random.binomial(n=1, p=DATA.logit_b / DATA.logit_b.max())
DATA['RI_unpooled'] = np.random.binomial(n=1, p=DATA.logit / DATA.logit.max())
DATA['RI_partpooled'] = np.random.binomial(n=1, p=DATA.logit_x / DATA.logit_x.max())

DATA.head()

Unnamed: 0,sidx0,sidx1,dist,b,velo,velo_x,intercept,logit_b,logit,logit_x,RI_pooled,RI_unpooled,RI_partpooled
0,0,1,0.073376,3.290771,3.108196,5.327174,0.053578,0.000195,0.000314,9.547395e-07,0,0,0
1,0,2,0.089748,3.140516,3.355512,5.260493,0.037017,0.000534,0.000318,3.224566e-06,0,0,0
2,0,3,0.12977,2.91617,3.64157,4.83969,0.044254,0.002703,0.000616,5.336296e-05,0,0,0
3,0,4,0.144542,2.815045,3.387183,4.881691,0.041696,0.004482,0.001487,8.268546e-05,0,0,0
4,0,5,0.144542,2.852265,3.599622,4.928176,0.061829,0.004257,0.001006,7.711301e-05,0,0,0


In [54]:
NSAMPLES = 1000
SAMPLE = DATA.sample(NSAMPLES).copy().reset_index(drop=True)
SAMPLE.head()

Unnamed: 0,sidx0,sidx1,dist,b,velo,velo_x,intercept,logit_b,logit,logit_x,RI_pooled,RI_unpooled,RI_partpooled
0,8,62,1.0,3.522723,3.955839,3.940824,0.019467,0.504867,0.504867,0.504867,1,1,1
1,36,47,1.0,3.070545,2.748627,3.098635,0.059017,0.51475,0.51475,0.51475,1,1,0
2,35,43,0.742448,2.93487,3.327025,4.537597,0.033362,0.301398,0.277392,0.21116,1,1,1
3,9,54,1.0,3.092278,1.829145,3.817094,0.050671,0.512665,0.512665,0.512665,1,1,1
4,15,55,1.0,3.035191,3.646696,3.494298,0.087729,0.521918,0.521918,0.521918,0,1,1


### Visualize data

In [18]:
def logit_plot(dist, logit, RI):
    canvas = toyplot.Canvas(width=500, height=250)
    ax0 = canvas.cartesian(
        label="pooled data (function)",
        xlabel="Genetic dist.",
        ylabel="Logit function",
        grid=(1, 2, 0),
    )
    ax1 = canvas.cartesian(
        label="pooled data (observation)",
        xlabel="Genetic dist.",
        ylabel="RI",
        grid=(1, 2, 1),
    )

    # points are jittered on x-axis for visibility
    ax0.scatterplot(
        dist,
        logit,
        size=5,
        opacity=0.33,
        color=toyplot.color.Palette()[0],
    );
    ax1.scatterplot(
        dist,
        RI,
        size=10,
        opacity=0.2,
        marker="|",
        mstyle={
            "stroke": toyplot.color.Palette()[1],
            "stroke-width": 3,
        },
    );
    return canvas, (ax0, ax1)

In [55]:
logit_plot(SAMPLE.dist, SAMPLE.logit_b, SAMPLE.RI_pooled);

In [56]:
logit_plot(SAMPLE.dist, SAMPLE.logit, SAMPLE.RI_unpooled);

In [57]:
logit_plot(SAMPLE.dist, SAMPLE.logit_x, SAMPLE.RI_partpooled);

### Define models

In [58]:
def pooled_logistic(x, y, **kwargs):
    
    # define model
    with pm.Model() as model:  

        # parameters and error
        ùõº = pm.Normal('ùõº', mu=0., sigma=10., shape=1)
        ùõΩ = pm.Normal('ùõΩ', mu=0., sigma=10., shape=1)
        
        # link function
        effect = ùõº + ùõΩ * np.log(x)
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood
        y = pm.Bernoulli("y", p=logit, observed=y)
        
        # sample posterior, skip burnin
        trace = pm.sample(**kwargs)[1000:]
    
        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

In [59]:
def unpooled_logistic(x, y, idx0, idx1, **kwargs):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0.values)
        sidx1 = pm.Data("spp_idx1", idx1.values)

        # parameters and error
        ùúì_mean = pm.Normal('ùúì_mean', mu=0., sigma=5., shape=1)
        ùúì_std = pm.HalfNormal('ùúì_std', 5., shape=1)
        ùúì_offset = pm.Normal('ùúì_offset', mu=0, sigma=1., shape=TREE.ntips)
        ùúì = pm.Deterministic('ùúì', ùúì_mean + ùúì_std * ùúì_offset)
        ùõº = pm.Normal('ùõº', mu=0., sigma=10., shape=1)
        ùõΩ = pm.Normal('ùõΩ', mu=0., sigma=10., shape=1)
        
        # link function
        effect = ùõº + (ùõΩ + ùúì[sidx0] + ùúì[sidx1]) * np.log(x)
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood
        y = pm.Bernoulli("y", p=logit, observed=y)
        
        # sample posterior, skip burnin
        trace = pm.sample(init = 'adapt_diag', **kwargs)[1000:]
    
        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

In [60]:
def partpooled_logistic(x, y, idx0, idx1, gidx, **kwargs):
    
    # define model
    with pm.Model() as model:
        
        # indexers
        sidx0 = pm.Data("spp_idx0", idx0)
        sidx1 = pm.Data("spp_idx1", idx1)
        gidx = pm.Data("gidx", gidx)

        # parameters and error
        ùúì_mean = pm.Normal('ùúì_mean', mu=0., sigma=5., shape=4)
        ùúì_std = pm.HalfNormal('ùúì_std', 5., shape=4)
        ùúì_offset = pm.Normal('ùúì_offset', mu=0, sigma=1., shape=TREE.ntips)
        ùúì = pm.Deterministic('ùúì', ùúì_mean[gidx] + ùúì_std[gidx] * ùúì_offset)
        ùõΩ = pm.Normal('ùõΩ', mu=0., sigma=10., shape=1)
        ùõº = pm.Normal('ùõº', mu=0., sigma=10., shape=1)
        
        # linear model prediction
        effect = ùõº + (ùõΩ + ùúì[sidx0] + ùúì[sidx1]) * np.log(x)
        logit = pm.Deterministic("logit", pm.invlogit(effect))
        
        # data likelihood (normal distributed errors)
        y = pm.Bernoulli("y", p=logit, observed=y)

        # sample posterior, skip burnin
        trace = pm.sample(init = 'adapt_diag', **kwargs)[1000:]

        # show summary table
        stats = pm.summary(trace)
        
    # organize results
    result_dict = {
        'model': model, 
        'trace': trace,
        'stats': stats,
    }
    return result_dict

### Functions to plot results

In [25]:
def toytrace(trace, var_names, titles):
    """
    Plot posterior trace with toyplot
    """
    nvars = len(var_names)
    
    # setup canvase
    canvas = toyplot.Canvas(width=500, height=200 * nvars)
    
    # store axes
    axes = []
    
    # iter over params
    for pidx, param in enumerate(var_names):
        
        # get param posterior
        posterior = trace.get_values(param)
        
        # setup axes 
        ax = canvas.cartesian(grid=(nvars, 1, pidx))
        ax.y.show = False
        ax.x.spine.style = {"stroke-width": 1.5}
        ax.x.ticks.labels.style = {"font-size": "12px"}
        ax.x.ticks.show = True
        ax.x.label.text = f"param='{titles[pidx]}'"        
        
        # iterate over shape of param
        for idx in range(posterior.shape[1]):
            mags, bins = np.histogram(posterior[:, idx], bins=100)
            ax.plot(bins[1:], mags, stroke_width=2, opacity=0.6)
        axes.append(ax)
    return canvas, axes

In [26]:
import scipy.stats as stats

def draw_velocity_dists(trace, baseline=0.15):
    """
    Draw the clade velocities as gaussians
    """
    canvas = toyplot.Canvas(width=350, height=300)
    axes = canvas.cartesian(xlabel="Relative velocity of reproductive isolation")
    marks = []
    base = 0
    for i in range(trace['ùúì_mean'].shape[1]):
        
        loc = trace['ùúì_mean'][:, i].mean()
        scale = trace['ùúì_std'][:, i].mean()
        interval = stats.norm.interval(0.995, loc, scale)
        points = np.linspace(interval[0], interval[1], 100)
        mark = axes.fill(
            points, 
            stats.norm.pdf(points, loc=loc, scale=scale), 
            style={
                "fill-opacity": 0.45,
                "stroke": 'black',
                "stroke-opacity": 1.0,
                "stroke-width": 1,
            },
            baseline=np.repeat(base, 100),
        )
        marks.append(mark)
        axes.hlines(base, style={"stroke-dasharray": "5,5", 'stroke-width': 1})
        base += baseline
        
    axes.y.show = False
    axes.x.ticks.locator = toyplot.locator.Extended(only_inside=True)
    axes.x.ticks.show = True
    return canvas, axes, marks

In [61]:
# MCMC sampler kwargs
sample_kwargs = dict(
    tune=10000,
    draws=10000,
    target_accept=0.99,
    return_inferencedata=False,
    progressbar=True,
)

### Run three datasets under pooled model

In [62]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_pooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# pooled model
pooled_model_pooled_data = pooled_logistic(*model_args[:2], **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõΩ, ùõº]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 194 seconds.


In [63]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_unpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# pooled model
pooled_model_unpooled_data = pooled_logistic(*model_args[:2], **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõΩ, ùõº]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 190 seconds.


In [64]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# pooled model
pooled_model_partpooled_data = pooled_logistic(*model_args[:2], **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõΩ, ùõº]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 201 seconds.


### Run three datasets under unpooled model

In [65]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_pooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# unpooled model
unpooled_model_pooled_data = unpooled_logistic(*model_args[:4], **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõΩ, ùõº, ùúì_offset, ùúì_std, ùúì_mean]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 8480 seconds.


In [66]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_unpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# unpooled model
unpooled_model_unpooled_data = unpooled_logistic(*model_args[:4], **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõΩ, ùõº, ùúì_offset, ùúì_std, ùúì_mean]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 9506 seconds.


In [None]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# unpooled model
unpooled_model_partpooled_data = unpooled_logistic(*model_args[:4], **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõΩ, ùõº, ùúì_offset, ùúì_std, ùúì_mean]


### Run three datasets under partpooled model

In [73]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_pooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# unpooled model
partpooled_model_pooled_data = partpooled_logistic(*model_args, **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõº, ùõΩ, ùúì_offset, ùúì_std, ùúì_mean]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 3960 seconds.
The number of effective samples is smaller than 25% for some parameters.


In [74]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_unpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# unpooled model
partpooled_model_unpooled_data = partpooled_logistic(*model_args, **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõº, ùõΩ, ùúì_offset, ùúì_std, ùúì_mean]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 3824 seconds.
The number of effective samples is smaller than 25% for some parameters.


In [75]:
# model input
model_args = [
    SAMPLE.dist,
    SAMPLE.RI_partpooled,
    SAMPLE.sidx0,
    SAMPLE.sidx1,
    gidx
]

# unpooled model
partpooled_model_partpooled_data = partpooled_logistic(*model_args, **sample_kwargs)

Auto-assigning NUTS sampler...
Initializing NUTS using adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [ùõº, ùõΩ, ùúì_offset, ùúì_std, ùúì_mean]


Sampling 4 chains for 10_000 tune and 10_000 draw iterations (40_000 + 40_000 draws total) took 4000 seconds.


In [76]:
partpooled_model_partpooled_data['stats']

Unnamed: 0,mean,sd,hdi_3%,hdi_97%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
ùúì_mean[0],2.536,2.332,-2.019,6.740,0.021,0.015,12428.0,12428.0,12434.0,19191.0,1.0
ùúì_mean[1],2.494,2.385,-2.009,6.977,0.021,0.015,12737.0,12737.0,12733.0,19914.0,1.0
ùúì_mean[2],-0.962,2.272,-5.258,3.268,0.021,0.015,12001.0,12001.0,12003.0,18209.0,1.0
ùúì_mean[3],-1.072,2.295,-5.270,3.349,0.021,0.015,12169.0,12169.0,12172.0,18197.0,1.0
ùúì_offset[0],0.012,0.942,-1.760,1.785,0.003,0.006,84133.0,12774.0,84259.0,25160.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
logit[995],0.002,0.005,0.000,0.006,0.000,0.000,33140.0,33140.0,35555.0,31562.0,1.0
logit[996],0.003,0.013,0.000,0.010,0.000,0.000,32506.0,32506.0,47737.0,30822.0,1.0
logit[997],0.954,0.009,0.937,0.970,0.000,0.000,62841.0,62841.0,63406.0,25586.0,1.0
logit[998],0.954,0.009,0.937,0.970,0.000,0.000,62841.0,62841.0,63406.0,25586.0,1.0


In [77]:
toytrace(partpooled_model_partpooled_data['trace'], ['ùúì_mean', 'ùúì_offset', 'ùúì'], ['psi-mean', 'psi-offset', 'psi-spp']);

In [78]:
# show plot of TRUE vs. ESTIMATED rates
c, a, m = toyplot.scatterplot(
    partpooled_model_partpooled_data['trace']['ùúì'].mean(axis=0),         # estimated
    SPECIES_DATA['psi_x'],                             # true
    width=400,
    height=250,
    xlabel="ESTIMATED species velocity",
    ylabel="TRUE species velocity",
    color=[toyplot.color.Palette()[i] for i in SPECIES_DATA.gidx],
);

In [79]:
draw_velocity_dists(partpooled_model_partpooled_data['trace'], baseline = 1.2);

### Assess model fit

In [43]:
def rmse(predictions, targets):
    differences = predictions - targets                       #the DIFFERENCEs.
    differences_squared = differences ** 2                    #the SQUAREs of ^
    mean_of_differences_squared = differences_squared.mean()  #the MEAN of ^
    rmse_val = np.sqrt(mean_of_differences_squared)           #ROOT of ^
    return rmse_val   

In [44]:
def aicm(mean, variance):
    return 2*mean - 2*variance

In [45]:
rmse(unpooled_sub['trace']['ùúì'].mean(axis=0), SPECIES_DATA['psi'])

1.6829399085369496

In [46]:
rmse(partpooled_sub['trace']['ùúì'].mean(axis=0), SPECIES_DATA['psi_x'])

1.2960935050421183

In [47]:
aicm(unpooled_sub['trace']['ùúì'].mean(), unpooled_sub['trace']['ùúì'].var())

-24.07642765287162

In [48]:
# Raftery et al. (2007) holds that this value is better.  R+M disagree?
aicm(partpooled_sub['trace']['ùúì'].mean(), partpooled_sub['trace']['ùúì'].var())

-17.436591387294683

In [80]:
az_pooled_model_pooled_data = az.from_pymc3(trace = pooled_model_pooled_data['trace'], 
                                            model = pooled_model_pooled_data['model'])
az_pooled_model_unpooled_data = az.from_pymc3(trace = pooled_model_unpooled_data['trace'], 
                                            model = pooled_model_unpooled_data['model'])
az_pooled_model_partpooled_data = az.from_pymc3(trace = pooled_model_partpooled_data['trace'], 
                                            model = pooled_model_partpooled_data['model'])
az_unpooled_model_pooled_data = az.from_pymc3(trace = unpooled_model_pooled_data['trace'], 
                                            model = unpooled_model_pooled_data['model'])
az_unpooled_model_unpooled_data = az.from_pymc3(trace = unpooled_model_unpooled_data['trace'], 
                                            model = unpooled_model_unpooled_data['model'])
az_unpooled_model_partpooled_data = az.from_pymc3(trace = unpooled_model_partpooled_data['trace'], 
                                            model = unpooled_model_partpooled_data['model'])
az_partpooled_model_pooled_data = az.from_pymc3(trace = partpooled_model_pooled_data['trace'], 
                                            model = partpooled_model_pooled_data['model'])
az_partpooled_model_unpooled_data = az.from_pymc3(trace = partpooled_model_unpooled_data['trace'], 
                                            model = partpooled_model_unpooled_data['model'])
az_partpooled_model_partpooled_data = az.from_pymc3(trace = partpooled_model_partpooled_data['trace'], 
                                            model = partpooled_model_partpooled_data['model'])

In [81]:
az.compare({"pooled_model_pooled_data": az_pooled_model_pooled_data,
            "pooled_model_unpooled_data": az_pooled_model_unpooled_data, 
            "pooled_model_partpooled_data": az_pooled_model_partpooled_data})

The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive accuracy.
  "\nThe scale is now log by default. Use 'scale' argument or "


Unnamed: 0,rank,loo,p_loo,d_loo,weight,se,dse,warning,loo_scale
pooled_model_pooled_data,0,-324.33,2.1962,0.0,0.90178,18.5939,0.0,False,log
pooled_model_unpooled_data,1,-358.191,2.30109,33.8604,0.0980357,18.8728,25.2834,False,log
pooled_model_partpooled_data,2,-390.841,2.82162,66.511,0.000184241,22.387,27.2619,False,log


In [82]:
az.compare({"unpooled_model_pooled_data": az_unpooled_model_pooled_data,
            "unpooled_model_unpooled_data": az_unpooled_model_unpooled_data, 
            "unpooled_model_partpooled_data": az_unpooled_model_partpooled_data})

The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive accuracy.
  "\nThe scale is now log by default. Use 'scale' argument or "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Unnamed: 0,rank,loo,p_loo,d_loo,weight,se,dse,warning,loo_scale
unpooled_model_pooled_data,0,-320.521,35.3211,0.0,0.617966,19.2584,0.0,True,log
unpooled_model_partpooled_data,1,-329.843,53.7655,9.32147,0.326374,20.2225,24.3726,True,log
unpooled_model_unpooled_data,2,-350.928,43.6695,30.4068,0.0556603,18.5592,25.3953,True,log


In [83]:
az.compare({"partpooled_model_pooled_data": az_partpooled_model_pooled_data,
            "partpooled_model_unpooled_data": az_partpooled_model_unpooled_data, 
            "partpooled_model_partpooled_data": az_partpooled_model_partpooled_data})

The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive accuracy.
  "\nThe scale is now log by default. Use 'scale' argument or "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Unnamed: 0,rank,loo,p_loo,d_loo,weight,se,dse,warning,loo_scale
partpooled_model_partpooled_data,0,-314.432,33.8041,0.0,0.582105,21.6858,0.0,True,log
partpooled_model_pooled_data,1,-320.324,34.4865,5.89155,0.401581,17.8145,24.7119,True,log
partpooled_model_unpooled_data,2,-356.786,51.575,42.354,0.0163138,18.8658,27.2717,True,log


In [84]:
az.compare({"pooled_model_pooled_data": az_pooled_model_pooled_data,
            "unpooled_model_unpooled_data": az_unpooled_model_unpooled_data, 
            "partpooled_model_partpooled_data": az_partpooled_model_partpooled_data})

The scale is now log by default. Use 'scale' argument or 'stats.ic_scale' rcParam if you rely on a specific value.
A higher log-score (or a lower deviance) indicates a model with better predictive accuracy.
  "\nThe scale is now log by default. Use 'scale' argument or "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "
  "Estimated shape parameter of Pareto distribution is greater than 0.7 for "


Unnamed: 0,rank,loo,p_loo,d_loo,weight,se,dse,warning,loo_scale
partpooled_model_partpooled_data,0,-314.432,33.8041,0.0,0.637551,19.7688,0.0,True,log
pooled_model_pooled_data,1,-324.33,2.1962,9.89803,0.333209,18.0489,24.7382,False,log
unpooled_model_unpooled_data,2,-350.928,43.6695,36.4957,0.0292403,18.5039,25.3989,True,log
