In [None]:
import pymc3 as pm
import numpy as np
import pandas as pd
import sklearn.model_selection as skm
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import arviz as az
import statsmodels.formula.api as smf
%config InlineBackend.figure_format = 'retina'
az.style.use('arviz-darkgrid')

In [None]:
df = pd.read_csv(r'D:\Bayes\resources\Rethinking\Data\Howell1.csv', sep=';')

In [None]:
df.age = (df.age - df.age.mean()) / df.age.std()
sns.distplot(df.age)

In [None]:
d1, d2 = skm.train_test_split(df, test_size=0.5)

In [None]:
len(d1)

In [None]:
d1.height.mean()

In [None]:
with pm.Model() as m6h1a:
    a = pm.Normal('a', 180, 50)
    b1 = pm.Normal('b1', 50, 20)
    m = pm.Deterministic('m', a + b1 * d1.age)
    s = pm.Uniform('s', 0, 50)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1a = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1b:
    a = pm.Normal('a', 180, 50)
    b1 = pm.Normal('b1', 50, 20)
    b2 = pm.Normal('b2', 50, 20)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2)
    s = pm.Uniform('s', 0, 50)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1b = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1c:
    a = pm.Normal('a', 180, 50)
    b1 = pm.Normal('b1', 50, 20)
    b2 = pm.Normal('b2', 50, 20)
    b3 = pm.Normal('b3', 50, 20)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3)
    s = pm.Uniform('s', 0, 50)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1c = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1d:
    a = pm.Normal('a', 180, 50)
    b1 = pm.Normal('b1', 50, 20)
    b2 = pm.Normal('b2', 50, 20)
    b3 = pm.Normal('b3', 50, 20)
    b4 = pm.Normal('b4', 50, 20)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3 + b4 * d1.age**4)
    s = pm.Uniform('s', 0, 50)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1d = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1e:
    a = pm.Normal('a', 180, 50)
    b1 = pm.Normal('b1', 50, 20)
    b2 = pm.Normal('b2', 50, 20)
    b3 = pm.Normal('b3', 50, 20)
    b4 = pm.Normal('b4', 50, 20)
    b5 = pm.Normal('b5', 50, 20)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3 + b4 * d1.age**4 + b5 * d1.age**5)
    s = pm.Uniform('s', 0, 50)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1e = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1f:
    a = pm.Normal('a', 180, 50)
    b1 = pm.Normal('b1', 50, 20)
    b2 = pm.Normal('b2', 50, 20)
    b3 = pm.Normal('b3', 50, 20)
    b4 = pm.Normal('b4', 50, 20)
    b5 = pm.Normal('b5', 50, 20)
    b6 = pm.Normal('b6', 50, 20)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3 + b4 * d1.age**4 + b5 * d1.age**5 + b6 * d1.age**6)
    s = pm.Uniform('s', 0, 50)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1f = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1a2:
    a = pm.Normal('a', 140, 30)
    b1 = pm.Normal('b1', 0, 50)
    m = pm.Deterministic('m', a + b1 * d1.age)
    s = pm.Normal('s', 30, 10)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1a2 = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1b2:
    a = pm.Normal('a', 140, 30)
    b1 = pm.Normal('b1', 0, 50)
    b2 = pm.Normal('b2', 0, 50)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2)
    s = pm.Normal('s', 30, 10)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1b2 = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1c2:
    a = pm.Normal('a', 140, 30)
    b1 = pm.Normal('b1', 0, 50)
    b2 = pm.Normal('b2', 0, 50)
    b3 = pm.Normal('b3', 0, 50)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3)
    s = pm.Normal('s', 30, 10)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1c2 = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1d2:
    a = pm.Normal('a', 140, 30)
    b1 = pm.Normal('b1', 0, 50)
    b2 = pm.Normal('b2', 0, 50)
    b3 = pm.Normal('b3', 0, 50)
    b4 = pm.Normal('b4', 0, 50)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3 + b4 * d1.age**4)
    s = pm.Normal('s', 30, 10)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1d2 = pm.sample(1000, tune=1000)

In [None]:
with pm.Model() as m6h1e2:
    a = pm.Normal('a', 140, 30)
    b1 = pm.Normal('b1', 0, 50)
    b2 = pm.Normal('b2', 0, 50)
    b3 = pm.Normal('b3', 0, 50)
    b4 = pm.Normal('b4', 0, 50)
    b5 = pm.Normal('b5', 0, 50)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3 + b4 * d1.age**4 + b5 * d1.age**5)
    s = pm.Normal('s', 30, 10)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1e2 = pm.sample(2000, tune=2000)

In [None]:
with pm.Model() as m6h1f2:
    a = pm.Normal('a', 140, 30)
    b1 = pm.Normal('b1', 0, 50)
    b2 = pm.Normal('b2', 0, 50)
    b3 = pm.Normal('b3', 0, 50)
    b4 = pm.Normal('b4', 0, 50)
    b5 = pm.Normal('b5', 0, 50)
    b6 = pm.Normal('b6', 0, 50)
    m = pm.Deterministic('m', a + b1 * d1.age + b2 * d1.age**2 + b3 * d1.age**3 + b4 * d1.age**4 + b5 * d1.age**5 + b6 * d1.age**6)
    s = pm.Normal('s', 30, 10)
    h = pm.Normal('h', mu=m, sd=s, observed=d1.height)
    trace_6h1f2 = pm.sample(2000, tune=2500)

In [None]:
m6h1a.name = 'm6h1a'
m6h1b.name = 'm6h1b'
m6h1c.name = 'm6h1c'
m6h1d.name = 'm6h1d'
m6h1e.name = 'm6h1e'
m6h1f.name = 'm6h1f'
model_dict = {m6h1a: trace_6h1a, m6h1b: trace_6h1b, m6h1c: trace_6h1c, m6h1d: trace_6h1d, m6h1e: trace_6h1e, m6h1f: trace_6h1f}

In [None]:
m6h1a2.name = 'm6h1a2'
m6h1b2.name = 'm6h1b2'
m6h1c2.name = 'm6h1c2'
m6h1d2.name = 'm6h1d2'
m6h1e2.name = 'm6h1e2'
m6h1f2.name = 'm6h1f2'
model_dict2 = {m6h1a2: trace_6h1a2, m6h1b2: trace_6h1b2, m6h1c2: trace_6h1c2, m6h1d2: trace_6h1d2, m6h1e2: trace_6h1e2, m6h1f2: trace_6h1f2}

In [None]:
pm.compare(model_dict2)

In [None]:
comp = pm.compare(model_dict)
comp

In [None]:
A_avg = np.linspace(-2, 3, 100)

In [None]:
mu_pred = trace_6h1a['a'] + trace_6h1a['b1'] * A_avg[:,None]
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1b['a'] + trace_6h1b['b1'] * A_avg[:,None] + trace_6h1b['b2'] * (A_avg[:,None])**2
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1c['a'] + trace_6h1c['b1'] * A_avg[:,None] + trace_6h1c['b2'] * (A_avg[:,None])**2 + trace_6h1c['b3'] * (A_avg[:,None])**3
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1d['a'] + trace_6h1d['b1'] * A_avg[:,None] + trace_6h1d['b2'] * (A_avg[:,None])**2 + trace_6h1d['b3'] * (A_avg[:,None])**3 + trace_6h1d['b4'] * (A_avg[:,None])**4
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1e['a'] + trace_6h1e['b1'] * A_avg[:,None] + trace_6h1e['b2'] * (A_avg[:,None])**2 + trace_6h1e['b3'] * (A_avg[:,None])**3 + trace_6h1e['b4'] * (A_avg[:,None])**4 + trace_6h1e['b5'] * (A_avg[:,None])**5
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1f['a'] + trace_6h1f['b1'] * A_avg[:,None] + trace_6h1f['b2'] * (A_avg[:,None])**2 + trace_6h1f['b3'] * (A_avg[:,None])**3 + trace_6h1f['b4'] * (A_avg[:,None])**4 + trace_6h1f['b5'] * (A_avg[:,None])**5 + trace_6h1f['b6'] * (A_avg[:,None])**6
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
A_avg = np.linspace(-2, 3, 100)

In [None]:
mu_pred = trace_6h1a2['a'] + trace_6h1a2['b1'] * A_avg[:,None]
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1b2['a'] + trace_6h1b2['b1'] * A_avg[:,None] + trace_6h1b2['b2'] * (A_avg[:,None])**2
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1c2['a'] + trace_6h1c2['b1'] * A_avg[:,None] + trace_6h1c2['b2'] * (A_avg[:,None])**2 + trace_6h1c2['b3'] * (A_avg[:,None])**3
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1d2['a'] + trace_6h1d2['b1'] * A_avg[:,None] + trace_6h1d2['b2'] * (A_avg[:,None])**2 + trace_6h1d2['b3'] * (A_avg[:,None])**3 + trace_6h1d2['b4'] * (A_avg[:,None])**4
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1e2['a'] + trace_6h1e2['b1'] * A_avg[:,None] + trace_6h1e2['b2'] * (A_avg[:,None])**2 + trace_6h1e2['b3'] * (A_avg[:,None])**3 + trace_6h1e2['b4'] * (A_avg[:,None])**4 + trace_6h1e2['b5'] * (A_avg[:,None])**5
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
mu_pred = trace_6h1f2['a'] + trace_6h1f2['b1'] * A_avg[:,None] + trace_6h1f2['b2'] * (A_avg[:,None])**2 + trace_6h1f2['b3'] * (A_avg[:,None])**3 + trace_6h1f2['b4'] * (A_avg[:,None])**4 + trace_6h1f2['b5'] * (A_avg[:,None])**5 + trace_6h1f2['b6'] * (A_avg[:,None])**6
plt.scatter(df.age, df.height, alpha=0.3)
plt.plot(A_avg, mu_pred.mean(1), 'C0')
az.plot_hpd(A_avg, mu_pred.T, credible_interval=0.97)
plt.axis([-2, 3, 40, 190])

In [None]:
ppc_w = pm.sample_posterior_predictive_w(list(model_dict.values()), 1000, list(model_dict.keys()), weights=comp.weight.sort_index(ascending=True))

In [None]:
ppc_d = pm.sample_posterior_predictive(trace_6h1d, 1000, m6h1d)

In [None]:
mu_pred_d = ppc_d['h']
plt.plot(list(zip(d1.age, mu_pred_d.mean(0))))

In [None]:
list(zip(d1.age, mu_pred_d.mean(1)))