In [None]:
import pymc3 as pm

In [None]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import pandas as pd

import arviz as az
%config InlineBackend.figure_format = 'retina'
az.style.use('arviz-darkgrid')

In [None]:
data_path = r'D:\Bayes\resources\Rethinking\Data\Howell1.csv'
d = pd.read_csv(data_path, sep=';', header=0)
d2 = d[d.age >= 18]

In [None]:
with pm.Model() as m4_1:
    mu = pm.Normal('mu', mu=178, sd=20)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d2.height)

In [None]:
with m4_1:
    trace_4_1 = pm.sample(1000, tune=1000)

In [None]:
az.plot_trace(trace_4_1)

In [None]:
az.summary(trace_4_1, credible_interval=0.89).round(2)

In [None]:
with pm.Model() as m4_1:
    mu = pm.Normal('mu', mu=178, sd=20, testval=d2.height.mean())
    sigma = pm.Uniform('sigma', lower=0, upper=50, testval=d2.height.std())
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d2.height)
    trace_4_1 = pm.sample(1000, tune=1000)

In [None]:
az.plot_trace(trace_4_1)
az.summary(trace_4_1, credible_interval=0.89).round(2)

In [None]:
with pm.Model() as m4_2:
    mu = pm.Normal('mu', mu=178, sd=0.1, testval=d2.height.mean())
    sigma = pm.Uniform('sigma', lower=0, upper=50, testval=d2.height.std())
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d2.height)
    trace_4_2 = pm.sample(1000, tune=1000)

In [None]:
az.plot_trace(trace_4_2)
az.summary(trace_4_2, credible_interval=0.89).round(2)

In [None]:
prior1 = stats.norm.rvs(178, 20, 1000)
prior2 = stats.norm.rvs(178, 0.1, 1000)
_, ax = plt.subplots(1,2, figsize=(8,4))
az.plot_kde(prior1, ax=ax[0])
az.plot_kde(prior2, ax=ax[1])

In [None]:
trace_df = pm.trace_to_dataframe(trace_4_1)
trace_df.cov()

In [None]:
np.diag(trace_df.cov())

In [None]:
trace_df.corr()

In [None]:
trace_df.head()

In [None]:
az.summary(trace_4_1, credible_interval=0.89).round(2)

In [None]:
plt.scatter(trace_4_1['mu'], trace_4_1['sigma'], alpha=0.1)

In [None]:
plt.plot(d2.height, d2.weight, '.')

In [None]:
with pm.Model() as m4_3:
    alpha = pm.Normal('alpha', mu=178, sd=100)
    beta = pm.Normal('beta', mu=0, sd=10)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = alpha + beta * d2.weight
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d2.height)
    trace_4_3 = pm.sample(1000, tune=1000)

In [None]:
az.plot_trace(trace_4_3)

In [None]:
az.summary(trace_4_3, credible_interval=0.89).round(2)

In [None]:
az.plot_kde(stats.norm.rvs(0.9, 0.04, 1000))

In [None]:
trace_df_4_3 = pm.trace_to_dataframe(trace_4_3)
trace_df_4_3.corr().round(2)

In [None]:
d2 = d2.assign(weight_c=pd.Series(d2.weight - d2.weight.mean()))

In [None]:
print(d2.weight.mean())
d2

In [None]:
with pm.Model() as m4_4:
    alpha = pm.Normal('alpha', mu=178, sd=100)
    beta = pm.Normal('beta', mu=0, sd=10)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = alpha + beta * d2.weight_c
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d2.height)
    trace_4_4 = pm.sample(1000, tune=1000)

In [None]:
az.summary(trace_4_4, credible_interval=.11).round(2)

In [None]:
d2.height.mean()

In [None]:
trace_df = pm.trace_to_dataframe(trace_4_4)
trace_df.corr().round(2)

In [None]:
plt.plot(d2.weight, d2.height, '.')
plt.plot(d2.weight, trace_4_3['alpha'].mean() + trace_4_3['beta'].mean() * d2.weight)
plt.xlabel(d2.columns[1])
plt.ylabel(d2.columns[0])

In [None]:
trace_df_4_4 = pm.trace_to_dataframe(trace_4_4)
trace_df_4_4[:5]

In [None]:
N = [10, 50, 150, 352][3]
with pm.Model() as m_N:
    alpha = pm.Normal('alpha', mu=178, sd=100)
    beta = pm.Normal('beta', mu=0, sd=10)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = pm.Deterministic('mu', alpha + beta * d2.weight[:N])
    height_hat = pm.Normal('height_hat', mu=mu, sd=sigma, observed=d2.height[:N])
    trace_N = pm.sample(1000, tune=1000)

In [None]:
chain_N = trace_N[100:]
az.plot_trace(chain_N, var_names='~mu')

In [None]:
plt.plot(d2.weight[:N], d2.height[:N], 'C0o')
for i in range(20):
    idx = np.random.randint(len(chain_N))
    plt.plot(d2.weight[:N], chain_N['mu'][idx], 'C1-', alpha=0.25)
plt.xlabel(d2.columns[1])
plt.ylabel(d2.columns[0])

In [None]:
mu_at_50 = trace_N['alpha'] + trace_N['beta'] * (50 - np.mean(d2.weight_c))

In [None]:
az.plot_kde(mu_at_50)

In [None]:
az.hpd(mu_at_50, credible_interval=0.89).round(2)

In [None]:
df_trace_N = pm.trace_to_dataframe(chain_N).filter(regex=('mu.*'))
df_trace_N.head()

In [None]:
weight_seq = np.arange(25, 71)
chain_N_thin = chain_N[::10]
mu_pred = np.zeros((len(weight_seq), len(chain_N_thin) * chain_N_thin.nchains))
for i, w in enumerate(weight_seq):
    mu_pred[i] = chain_N_thin['alpha'] + chain_N_thin['beta'] * w

In [None]:
len(mu_pred)

In [None]:
plt.plot(weight_seq, mu_pred, 'C0.', alpha=0.1)
plt.xlabel('weight')
plt.ylabel('height')

In [None]:
mu_mean = mu_pred.mean(1)
mu_hpd = az.hpd(mu_pred.T, credible_interval=0.89)

In [None]:
plt.scatter(d2.weight[:N], d2.height[:N])
plt.plot(weight_seq, mu_mean, color='black')
az.plot_hpd(weight_seq, mu_pred.T)
plt.xlabel('weight')
plt.ylabel('height')
plt.xlim(d2.weight[:N].min(), d2.weight[:N].max())

In [None]:
height_pred = pm.sample_posterior_predictive(chain_N, 200, m_N)
az.hpd(height_pred['height_hat'])[:5]

In [None]:
plt.scatter(d2.weight[:N], d2.height[:N])
plt.plot(weight_seq, mu_mean, color='black')
az.plot_hpd(weight_seq, mu_pred.T)
az.plot_hpd(d2.weight[:N], height_pred['height_hat'])
plt.xlabel('weight')
plt.ylabel('height')
plt.xlim(d2.weight[:N].min(), d2.weight[:N].max())

In [None]:
post_samples = []
for _ in range(1000): # number of samples from the posterior
    i = np.random.randint(len(chain_N))
    mu_pr = chain_N['alpha'][i] + chain_N['beta'][i] * weight_seq
    sigma_pred = chain_N['sigma'][i]
    post_samples.append(np.random.normal(mu_pr, sigma_pred))

In [None]:
plt.scatter(d2.weight[:N], d2.height[:N])
plt.plot(weight_seq, mu_mean, color='black')
az.plot_hpd(weight_seq, mu_pred.T)
az.plot_hpd(weight_seq, np.array(post_samples))
plt.xlabel('weight')
plt.ylabel('height')
plt.xlim(d2.weight.min(), d2.weight.max())

In [None]:
plt.scatter(d.weight, d.height, alpha=0.3)

In [None]:
d['weight_std'] = (d.weight - d.weight.mean()) / d.weight.std()
d['weight_std2'] = d.weight_std**2

In [None]:
with pm.Model() as m_4_5:
    alpha = pm.Normal('alpha', mu=178, sd=20)
    beta = pm.Normal('beta', mu=0, sd=1, shape=2)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = pm.Deterministic('mu', alpha + beta[0] * d.weight_std + beta[1] * d.weight_std2)
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d.height)
    trace_4_5 = pm.sample(1000, tune=1000)

In [None]:
az.plot_trace(trace_4_5, ['~mu'])

In [None]:
with pm.Model() as m_4_5_ln:
    alpha = pm.Normal('alpha', mu=178, sd=20)
    beta1 = pm.Lognormal('beta1', mu=0, sd=1)
    beta2 = pm.Normal('beta2', mu=0, sd=1)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = pm.Deterministic('mu', alpha + beta1 * d.weight_std + beta2 * d.weight_std2)
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d.height)
    trace_4_5_ln = pm.sample(1000, tune=1000)

In [None]:
az.plot_trace(trace_4_5_ln, ['~mu'])

In [None]:
az.summary(trace_4_5, ['~mu'], credible_interval=.89).round(2)

In [None]:
az.summary(trace_4_5_ln, ['~mu'], credible_interval=.89).round(2)

In [None]:
mu_pred = trace_4_5_ln['mu']
height_pred = pm.sample_ppc(trace_4_5_ln, 200, m_4_5_ln)

In [None]:
plt.scatter(d.weight_std, d.height, c='C0', alpha=0.3)
az.plot_hpd(d.weight_std, mu_pred, credible_interval=0.89)
az.plot_hpd(d.weight_std, height_pred['height'], credible_interval=0.89)

In [None]:
d['weight_std3'] = d.weight_std**3

In [None]:
with pm.Model() as m_4_6:
    alpha = pm.Normal('alpha', mu=178, sd=20)
    beta1 = pm.Normal('beta1', mu=0, sd=10)
    beta2 = pm.Normal('beta2', mu=0, sd=10)
    beta3 = pm.Normal('beta3', mu=0, sd=10)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = pm.Deterministic('mu', alpha + beta1 * d.weight_std + beta2 * d.weight_std2 + beta3 * d.weight_std3)
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d.height)
    trace_4_6 = pm.sample(1000, tune=1000)

In [None]:
mu_pred = trace_4_6['mu']
height_pred = pm.sample_posterior_predictive(trace_4_6, 200, model=m_4_6)

In [None]:
plt.scatter(d.weight_std, d.height, c='C0', alpha=0.3)
az.plot_hpd(d.weight_std, mu_pred, credible_interval=0.89)
az.plot_hpd(d.weight_std, height_pred['height'], credible_interval=0.89)
at = np.arange(-2, 3)
plt.xticks(at, np.round(at * d.weight.std() + d.weight.mean(), 1))

In [None]:
n = 1000
mu_n = stats.norm.rvs(loc=0, scale=10, size=n)
sigma_n = stats.uniform.rvs(loc=0, scale=10, size=n)
prior = stats.norm.rvs(loc=mu_n, scale=sigma_n)
az.plot_kde(prior)

In [None]:
n = 1000
alpha = stats.norm.rvs(loc=120, scale=10, size=n)
beta = stats.norm.rvs(loc=7, scale=2, size=n)
sigma = stats.uniform.rvs(loc=0, scale=15, size=n)
az.plot_kde(sigma)

In [None]:
with pm.Model() as h4h1:
    alpha = pm.Normal('alpha', mu=140, sd=30)
    beta = pm.Normal('beta', mu=0, sd=10)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = pm.Deterministic('mu', alpha + beta * d.weight)
    height_hat = pm.Normal('height_hat', mu=mu, sd=sigma, observed=d.height)
    trace_h4h1 = pm.sample(1000, tune=2500)

In [None]:
az.plot_trace(trace_h4h1, ['~mu'])

In [None]:
trace_df_h4h1 = pm.trace_to_dataframe(trace_h4h1)

In [None]:
trace_df_h4h1.head()

In [None]:
az.summary(trace_h4h1, ['~mu'])

In [None]:
for i in [46.95, 43.72, 64.78, 32.59, 54.63]:
    height_pred = trace_h4h1['alpha'].mean() + trace_h4h1['beta'].mean() * i
    print(height_pred)

In [None]:
weights = [46.95, 43.72, 64.78, 32.59, 54.63]
mu_pred = np.zeros((len(weights), len(trace_h4h1) * trace_h4h1.nchains))
for i, w in enumerate(weights):
    mu_pred[i] = trace_h4h1['alpha'] + trace_h4h1['beta'] * w

In [None]:
mu_hpd = az.hpd(mu_pred.T, credible_interval=0.89)

In [None]:
mu_pred.mean(1).round(2)

In [None]:
mu_hpd.round(2)

In [None]:
d3 = d[d.age < 18]
len(d3)

In [None]:
with pm.Model() as h4h2:
    alpha = pm.Normal('alpha', mu=110, sd=30)
    beta = pm.Normal('beta', mu=0, sd=10)
    sigma = pm.Uniform('sigma', lower=0, upper=60)
    mu = pm.Deterministic('mu', alpha + beta * d3.weight)
    height_hat = pm.Normal('height_hat', mu=mu, sd=sigma, observed=d3.height)
    trace_h4h2 = pm.sample(1000, tune=1000)

In [None]:
az.summary(trace_h4h2, ['~mu'], credible_interval=0.89)

In [None]:
weight_seq = np.arange(int(d3.weight.min()), int(d3.weight.max()))
mu_pred = trace_h4h2['mu']
height_pred = pm.sample_posterior_predictive(trace_h4h2, model=h4h2)

In [None]:
len(mu_pred.mean(0))

In [None]:
plt.scatter(d3.weight, d3.height, alpha=0.3)
plt.plot(d3.weight, mu_pred.mean(0), color='black')
az.plot_hpd(d3.weight, mu_pred, credible_interval=0.89)
az.plot_hpd(d3.weight, height_pred['height_hat'], credible_interval=0.89)
plt.xlabel('weight')
plt.ylabel('height')

In [None]:
with pm.Model() as h4h3:
    alpha = pm.Normal('alpha', mu=178, sd=100)
    beta = pm.Normal('beta', mu=0, sd=100)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    mu = pm.Deterministic('mu', alpha + beta * np.log(d.weight))
    height = pm.Normal('height', mu=mu, sd=sigma, observed=d.height)
    trace_h4h3 = pm.sample(1000, tune=3000)

In [None]:
az.summary(trace_h4h3, ['~mu'], credible_interval=0.89)

In [None]:
weight_seq = np.arange(int(d.weight.min()), int(d.weight.max()))
mu_pred = trace_h4h3['mu']
height_pred = pm.sample_posterior_predictive(trace_h4h3, model=h4h3)

In [None]:
plt.scatter(d.weight, d.height, alpha=0.3)
az.plot_hpd(d.weight, height_pred['height'], credible_interval=0.97, color='yellow')
az.plot_hpd(d.weight, mu_pred, credible_interval=0.97)
plt.xlabel('weight')
plt.ylabel('height')