### IMPORTS

In [1]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az

$\textbf{QUESTION 1}$

$$
\text{PART A}
$$

In [2]:
rabbits = pd.DataFrame(data={'time_after_injection': [24, 32, 48, 56, None,
                                                      70, 72, 75, 80, 96],
                             'temperature': [102.8, 104.5, 106.5, 107.0, 107.2, 105.1, 103.9, None,
                                             103.2, 102.1]})

rab_time = rabbits['time_after_injection']
rab_temp = rabbits['temperature']

In [3]:
# Get X mean/std
rab_time_mean = rab_time.mean()
rab_time_std = rab_time.std()

# Get y mean/std
rab_temp_mean = rab_temp.mean()
rab_temp_std = rab_temp.std()

print(f"{rab_time_mean=}\n{rab_time_std=}\n{rab_temp_mean=}\n{rab_temp_std=}")

rab_time_mean=61.44444444444444
rab_time_std=23.415332109064302
rab_temp_mean=104.7
rab_temp_std=1.8788294228055948


In [4]:
# Fix X missing values
rab_time = np.nan_to_num(rab_time, nan=-1)
rab_time = np.ma.masked_values(rab_time, value=-1)

# Fix y missing values
rab_temp = np.nan_to_num(rab_temp, nan=-1)
rab_temp = np.ma.masked_values(rab_temp, value=-1)

In [17]:
print(f"{rab_time=}\n{rab_temp}")

rab_time=masked_array(data=[24.0, 32.0, 48.0, 56.0, --, 70.0, 72.0, 75.0, 80.0,
                   96.0],
             mask=[False, False, False, False,  True, False, False, False,
                   False, False],
       fill_value=-1.0)
[102.8 104.5 106.5 107.0 107.2 105.1 103.9 -- 103.2 102.1]


In [6]:
with pm.Model() as rab:

    alpha = pm.Normal("alpha", mu=0, tau=0.001)
    beta = pm.Normal("beta", mu=0, tau=0.001)

    rab_time_imputed = pm.Normal("rab_time_imputed", mu=rab_time_mean, sigma=rab_time_std, observed=rab_time)

    mu = alpha + beta * rab_time_imputed

    pm.Normal("likelihood", mu=rab_temp_mean, sigma=rab_temp_std, observed=rab_temp)

    rab_trace = pm.sample(draws=2000, tune=1000, target_accept=0.95)

    rab_pred = pm.sample_posterior_predictive(rab_trace)





In [7]:
az.summary(rab_trace, hdi_prob=0.90, var_names=['alpha', 'beta', 'rab_time_imputed', 'likelihood'])

  numba_fn = numba.jit(**self.kwargs)(self.function)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha,0.354,31.178,-49.984,51.408,0.429,0.526,5295.0,2889.0,1.0
beta,-0.125,31.41,-50.94,51.172,0.419,0.51,5648.0,2857.0,1.0
rab_time_imputed[0],24.0,0.0,24.0,24.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[1],32.0,0.0,32.0,32.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[2],48.0,0.0,48.0,48.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[3],56.0,0.0,56.0,56.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[4],61.178,23.99,20.917,99.226,0.354,0.28,4602.0,2233.0,1.0
rab_time_imputed[5],70.0,0.0,70.0,70.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[6],72.0,0.0,72.0,72.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[7],75.0,0.0,75.0,75.0,0.0,0.0,4000.0,4000.0,


In [8]:
rab_temp_pred = rab_pred.posterior_predictive.stack(sample=("chain", "draw"))["likelihood"].values.T
az.r2_score(rab_temp, rab_temp_pred)

r2        0.355498
r2_std    0.108543
dtype: float64

$$
\text{The } R^2 ≈ 0.36.\\
\text{The 90\% Credible Set for the slope does contain 0.} \Rightarrow \\
\text{The difference is not statistically signifcant.}
$$

$$
\text{PART B}
$$

In [10]:
rabb = rabbits.copy()
rabb['time_2'] = rabb['time_after_injection'] ** 2

rabb_time = rabb[['time_after_injection', 'time_2']]
rabb_temp = rabb['temperature']

In [12]:
# Get X mean/std
rabb_time_mean = rabb_time['time_after_injection'].mean()
rabb_time_std = rabb_time['time_after_injection'].std()

rabb_time2_mean = rabb_time['time_2'].mean()
rabb_time2_std = rabb_time['time_2'].std()

# Get y mean/std
rabb_temp_mean = rabb_temp.mean()
rabb_temp_std = rabb_temp.std()

print(f"{rab_time_mean=}\n{rab_time_std=}\n{rabb_time2_mean=}\n{rabb_time2_std=}\n{rab_temp_mean=}\n{rab_temp_std=}")

rab_time_mean=61.44444444444444
rab_time_std=23.415332109064302
rabb_time2_mean=4262.777777777777
rabb_time2_std=2770.400051336349
rab_temp_mean=104.7
rab_temp_std=1.8788294228055948


In [15]:
# Fix X missing values
rabb_time = np.nan_to_num(rabb_time, nan=-1)
rabb_time = np.ma.masked_values(rabb_time, value=-1)

# Fix y missing values
rabb_temp = np.nan_to_num(rabb_temp, nan=-1)
rabb_temp = np.ma.masked_values(rabb_temp, value=-1)

In [16]:
print(f"{rabb_time=}\n{rabb_temp}")

rabb_time=masked_array(
  data=[[24.0, 576.0],
        [32.0, 1024.0],
        [48.0, 2304.0],
        [56.0, 3136.0],
        [--, --],
        [70.0, 4900.0],
        [72.0, 5184.0],
        [75.0, 5625.0],
        [80.0, 6400.0],
        [96.0, 9216.0]],
  mask=[[False, False],
        [False, False],
        [False, False],
        [False, False],
        [ True,  True],
        [False, False],
        [False, False],
        [False, False],
        [False, False],
        [False, False]],
  fill_value=-1.0)
[102.8 104.5 106.5 107.0 107.2 105.1 103.9 -- 103.2 102.1]


In [None]:
with pm.Model() as rabb:

    alphab = pm.Normal("alphab", mu=0, tau=0.001)
    betab = pm.Normal("betab", mu=0, tau=0.001)

    rabb_time_imputed = pm.Normal("rabb_time_imputed", mu=rabb_time_mean, sigma=rabb_time_std, observed=rabb_time)

    mu = alphab + betab * rab_time_imputed

    pm.Normal("likelihoodb", mu=rabb_temp_mean, sigma=rabb_temp_std, observed=rabb_temp)

    rabb_trace = pm.sample(draws=2000, tune=1000, target_accept=0.95)

    rabb_pred = pm.sample_posterior_predictive(rabb_trace)

In [None]:
az.summary(rabb_trace, hdi_prob=0.90, var_names=['alphab', 'betab', 'rabb_time_imputed', 'likelihoodb'])

In [None]:
rabb_temp_pred = rabb_pred.posterior_predictive.stack(sample=("chain", "draw"))["likelihoodb"].values.T
az.r2_score(rabb_temp, rabb_temp_pred)