### IMPORTS

In [1]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az

$\textbf{QUESTION 1}$

$$
\text{PART A}
$$

In [2]:
rabbits = pd.DataFrame(data={'time_after_injection': [24, 32, 48, 56, None,
                                                      70, 72, 75, 80, 96],
                             'temperature': [102.8, 104.5, 106.5, 107.0, 107.2, 105.1, 103.9, None,
                                             103.2, 102.1]})

rab_time = rabbits['time_after_injection']
rab_temp = rabbits['temperature']

In [3]:
# Get X mean/std
rab_time_mean = rab_time.mean()
rab_time_std = rab_time.std()

In [4]:
# Fix X missing values
rab_time = np.nan_to_num(rab_time, nan=-1)
rab_time = np.ma.masked_values(rab_time, value=-1)

# Fix y missing values
rab_temp = np.nan_to_num(rab_temp, nan=-1)
rab_temp = np.ma.masked_values(rab_temp, value=-1)

In [5]:
with pm.Model() as rab:

    alpha = pm.Normal("alpha", mu=0, tau=0.001)
    beta = pm.Normal("beta", mu=0, tau=0.001)

    rab_time_imputed = pm.Normal("rab_time_imputed", mu=rab_time_mean, sigma=rab_time_std, observed=rab_time)

    mu = alpha + beta * rab_time_imputed
    pm.Normal("likelihood", mu=mu, tau=0.001, observed=rab_temp)

    rab_trace = pm.sample(draws=2000, tune=1000, target_accept=0.95)
    rab_pred = pm.sample_posterior_predictive(rab_trace)





In [6]:
az.summary(rab_trace, hdi_prob=0.90, var_names=['alpha', 'beta', 'rab_time_imputed', 'likelihood'])

  numba_fn = numba.jit(**self.kwargs)(self.function)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha,55.421,21.61,20.452,91.122,0.477,0.337,2067.0,2130.0,1.0
beta,0.716,0.354,0.123,1.273,0.008,0.006,2024.0,2168.0,1.0
rab_time_imputed[0],24.0,0.0,24.0,24.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[1],32.0,0.0,32.0,32.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[2],48.0,0.0,48.0,48.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[3],56.0,0.0,56.0,56.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[4],62.887,20.623,28.525,95.394,0.384,0.277,2870.0,2461.0,1.0
rab_time_imputed[5],70.0,0.0,70.0,70.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[6],72.0,0.0,72.0,72.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[7],75.0,0.0,75.0,75.0,0.0,0.0,4000.0,4000.0,


In [7]:
rab_temp_pred = rab_pred.posterior_predictive.stack(sample=("chain", "draw"))["likelihood"].values.T
az.r2_score(rab_temp, rab_temp_pred)

r2        0.525788
r2_std    0.039414
dtype: float64

$$
\text{The } R^2 ≈ 0.36.\\
\text{The 90\% Credible Set for the slope does contain 0.} \Rightarrow \\
\text{The difference is not statistically signifcant.}
$$

$$
\text{PART B}
$$

In [8]:
rabb_time = rabbits['time_after_injection']
rabb_temp = rabbits['temperature']

In [10]:
# Get X mean/std
rabb_time_mean = rabb_time.mean()
rabb_time_std = rabb_time.std()

In [11]:
# Fix X missing values
rabb_time = np.nan_to_num(rabb_time, nan=-1)
rabb_time = np.ma.masked_values(rabb_time, value=-1)

# Fix y missing values
rabb_temp = np.nan_to_num(rabb_temp, nan=-1)
rabb_temp = np.ma.masked_values(rabb_temp, value=-1)

In [15]:
with pm.Model() as rabb:

    alphab = pm.Normal("alphab", mu=0, tau=0.001)
    betab1 = pm.Normal("betab1", mu=0, tau=0.001)
    betab2 = pm.Normal("betab2", mu=0, tau=0.001)

    rabb_time_imputed = pm.Normal("rabb_time_imputed", mu=rabb_time_mean, sigma=rabb_time_std, observed=rabb_time)
    rabb_time2_imputed = pm.Deterministic("rabb_time2_imputed", rabb_time_imputed ** 2)

    mu = alphab + (betab1 * rabb_time_imputed) + (betab2 * rabb_time2_imputed)
    pm.Normal("likelihoodb", mu=mu, tau=0.001, observed=rabb_temp)

    rabb_trace = pm.sample(draws=2000, tune=1000, target_accept=0.95)
    rabb_pred = pm.sample_posterior_predictive(rabb_trace)





In [17]:
az.summary(rabb_trace, hdi_prob=0.90, var_names=['alphab', 'betab1', 'betab2', 'rabb_time_imputed', 'rabb_time2_imputed', 'likelihoodb'])

  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alphab,17.024,29.143,-31.746,62.963,0.77,0.551,1438.0,1732.0,1.0
betab1,3.135,1.242,1.084,5.129,0.036,0.026,1199.0,1412.0,1.0
betab2,-0.025,0.012,-0.045,-0.005,0.0,0.0,1276.0,1446.0,1.0
rabb_time_imputed[0],24.0,0.0,24.0,24.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[1],32.0,0.0,32.0,32.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[2],48.0,0.0,48.0,48.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[3],56.0,0.0,56.0,56.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[4],61.726,20.546,26.732,93.487,0.45,0.324,2104.0,2211.0,1.0
rabb_time_imputed[5],70.0,0.0,70.0,70.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[6],72.0,0.0,72.0,72.0,0.0,0.0,4000.0,4000.0,


In [18]:
rabb_temp_pred = rabb_pred.posterior_predictive.stack(sample=("chain", "draw"))["likelihoodb"].values.T
az.r2_score(rabb_temp, rabb_temp_pred)

r2        0.521022
r2_std    0.047740
dtype: float64

$\textbf{QUESTION 2}$

$$
\text{PART A}
$$

$$
\text{PART B}
$$

$$
\text{PART C}
$$

$\textbf{QUESTION 3}$

$$
\text{PART A}
$$

$$
\text{PART B}
$$

$$
\text{PART C}
$$