### IMPORTS

In [1]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az

$\textbf{QUESTION 1}$

$$
\text{PART A}
$$

In [2]:
rabbits = pd.DataFrame(data={'time_after_injection': [24, 32, 48, 56, None,
                                                      70, 72, 75, 80, 96],
                             'temperature': [102.8, 104.5, 106.5, 107.0, 107.2, 105.1, 103.9, None,
                                             103.2, 102.1]})

rab_time = rabbits['time_after_injection']
rab_temp = rabbits['temperature']

In [3]:
# Get X mean/std
rab_time_mean = rab_time.mean()
rab_time_std = rab_time.std()

In [4]:
# Fix X missing values
rab_time = np.nan_to_num(rab_time, nan=-1)
rab_time = np.ma.masked_values(rab_time, value=-1)

# Fix y missing values
rab_temp = np.nan_to_num(rab_temp, nan=-1)
rab_temp = np.ma.masked_values(rab_temp, value=-1)

In [5]:
# SOME CODE BORROWED FROM AARON'S GH
with pm.Model() as rab:

    alpha = pm.Normal("alpha", mu=0, tau=0.001)
    beta = pm.Normal("beta", mu=0, tau=0.001)

    rab_time_imputed = pm.Normal("rab_time_imputed", mu=rab_time_mean, sigma=rab_time_std, observed=rab_time)

    mu = alpha + beta * rab_time_imputed
    pm.Normal("likelihood", mu=mu, tau=0.001, observed=rab_temp)

    rab_trace = pm.sample(draws=2000, tune=1000, target_accept=0.95)
    rab_pred = pm.sample_posterior_predictive(rab_trace)





In [6]:
az.summary(rab_trace, hdi_prob=0.90, var_names=['alpha', 'beta', 'rab_time_imputed', 'likelihood'])

  numba_fn = numba.jit(**self.kwargs)(self.function)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alpha,55.422,22.263,19.897,92.964,0.482,0.341,2134.0,2062.0,1.0
beta,0.717,0.367,0.128,1.335,0.008,0.006,2117.0,1723.0,1.0
rab_time_imputed[0],24.0,0.0,24.0,24.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[1],32.0,0.0,32.0,32.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[2],48.0,0.0,48.0,48.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[3],56.0,0.0,56.0,56.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[4],63.67,20.873,31.094,98.635,0.39,0.28,2890.0,2179.0,1.0
rab_time_imputed[5],70.0,0.0,70.0,70.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[6],72.0,0.0,72.0,72.0,0.0,0.0,4000.0,4000.0,
rab_time_imputed[7],75.0,0.0,75.0,75.0,0.0,0.0,4000.0,4000.0,


In [7]:
rab_temp_pred = rab_pred.posterior_predictive.stack(sample=("chain", "draw"))["likelihood"].values.T
az.r2_score(rab_temp, rab_temp_pred)

r2        0.525435
r2_std    0.037091
dtype: float64

$$
\text{The } R^2 ≈ 0.36.\\
\text{The 90\% Credible Set for the slope does contain 0.} \Rightarrow \\
\text{The difference is not statistically signifcant.}
$$

$$
\text{PART B}
$$

In [8]:
rabb_time = rabbits['time_after_injection']
rabb_temp = rabbits['temperature']

In [9]:
# Get X mean/std
rabb_time_mean = rabb_time.mean()
rabb_time_std = rabb_time.std()

In [10]:
# Fix X missing values
rabb_time = np.nan_to_num(rabb_time, nan=-1)
rabb_time = np.ma.masked_values(rabb_time, value=-1)

# Fix y missing values
rabb_temp = np.nan_to_num(rabb_temp, nan=-1)
rabb_temp = np.ma.masked_values(rabb_temp, value=-1)

In [11]:
# SOME CODE BORROWED FROM AARON'S GH
with pm.Model() as rabb:

    alphab = pm.Normal("alphab", mu=0, tau=0.001)
    betab1 = pm.Normal("betab1", mu=0, tau=0.001)
    betab2 = pm.Normal("betab2", mu=0, tau=0.001)

    rabb_time_imputed = pm.Normal("rabb_time_imputed", mu=rabb_time_mean, sigma=rabb_time_std, observed=rabb_time)
    rabb_time2_imputed = pm.Deterministic("rabb_time2_imputed", rabb_time_imputed ** 2)

    mu = alphab + (betab1 * rabb_time_imputed) + (betab2 * rabb_time2_imputed)
    pm.Normal("likelihoodb", mu=mu, tau=0.001, observed=rabb_temp)

    rabb_trace = pm.sample(draws=2000, tune=1000, target_accept=0.95)
    rabb_pred = pm.sample_posterior_predictive(rabb_trace)





In [12]:
az.summary(rabb_trace, hdi_prob=0.90, var_names=['alphab', 'betab1', 'betab2', 'rabb_time_imputed', 'rabb_time2_imputed', 'likelihoodb'])

  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)


Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
alphab,15.711,29.047,-30.957,64.159,0.817,0.578,1268.0,2050.0,1.0
betab1,3.222,1.241,1.031,5.098,0.039,0.028,1019.0,1504.0,1.0
betab2,-0.026,0.012,-0.045,-0.005,0.0,0.0,1108.0,1618.0,1.0
rabb_time_imputed[0],24.0,0.0,24.0,24.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[1],32.0,0.0,32.0,32.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[2],48.0,0.0,48.0,48.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[3],56.0,0.0,56.0,56.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[4],60.842,20.058,28.084,92.836,0.405,0.293,2464.0,2722.0,1.0
rabb_time_imputed[5],70.0,0.0,70.0,70.0,0.0,0.0,4000.0,4000.0,
rabb_time_imputed[6],72.0,0.0,72.0,72.0,0.0,0.0,4000.0,4000.0,


In [13]:
rabb_temp_pred = rabb_pred.posterior_predictive.stack(sample=("chain", "draw"))["likelihoodb"].values.T
az.r2_score(rabb_temp, rabb_temp_pred)

r2        0.533998
r2_std    0.036911
dtype: float64

$\textbf{QUESTION 2}$

In [97]:
bladder = pd.read_csv("bladderc.csv")
print(f"{bladder.shape}\n\n{bladder.nunique()}")

(86, 3)

time        37
observed     2
group        2
dtype: int64


In [98]:
# SOME CODE BORROWED FROM AARON'S GH
time = bladder.iloc[:, 0].copy()
observed_censored = bladder.iloc[:, 1].copy()
group = bladder.iloc[:, 2].copy()

censored = observed_censored[observed_censored == 0]

# x data
time_uncensored = time[observed_censored == 1]
time_censored = time[observed_censored == 0]

# y data
group_uncensored = group[observed_censored == 1]
group_censored = group[observed_censored == 0]

$$
\text{PART A}
$$

In [103]:
# SOME CODE BORROWED FROM AARON'S GH
with pm.Model() as blad:

    alpha = pm.Exponential('alpha', 3)
    beta0 = pm.Normal(name='beta0', mu=0, tau=0.001)
    beta1 = pm.Normal(name='beta1', mu=0, tau=0.001)

    new_times_censored = pm.math.exp(beta0 + (beta1 * group_censored))
    beta_censored = new_times_censored ** ((-1 / alpha))

    new_times_uncensored = pm.math.exp(beta0 + (beta1 * group_uncensored))
    beta_uncensored = new_times_uncensored ** ((-1 / alpha))


    impute_censored = pm.Bound("impute_censored",
                               pm.Weibull.dist(alpha=alpha,
                                               beta=beta_censored),
                               lower=censored,
                               shape=censored.shape[0],)

    likelihood = pm.Weibull(name="likelihood",
                            alpha=alpha,
                            beta=beta_uncensored,
                            observed=time_uncensored,
                            shape=time_uncensored.shape[0],)

    # MEAN CALCULATIONS
    mu0 = pm.Deterministic(name='mu0', var=pm.math.exp(-beta0))
    mu1 = pm.Deterministic(name='mu1', var=pm.math.exp(-beta0 - beta1))

    mu1mu0 = pm.Deterministic(name="mu1-mu0", var=mu1 - mu0)

    blad_trace = pm.sample(draws=1000,
                           tune=2000,
                           target_accept=0.95,
                           init="auto",
                           chains=2,
                           cores=None)



In [104]:
az.summary(blad_trace, hdi_prob=0.95)

Unnamed: 0,mean,sd,hdi_2.5%,hdi_97.5%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
beta0,-2.285,0.346,-2.915,-1.572,0.013,0.009,724.0,1064.0,1.0
beta1,-0.059,0.296,-0.649,0.534,0.007,0.006,1954.0,1623.0,1.0
alpha,0.999,0.112,0.777,1.214,0.004,0.003,682.0,1079.0,1.0
impute_censored[0],10.044,10.651,0.006,31.422,0.202,0.161,1877.0,959.0,1.0
impute_censored[1],10.327,10.679,0.006,30.013,0.196,0.158,2331.0,1395.0,1.0
impute_censored[2],10.084,10.589,0.005,30.309,0.205,0.173,1893.0,813.0,1.0
impute_censored[3],10.143,10.663,0.001,30.826,0.208,0.172,2469.0,1286.0,1.0
impute_censored[4],9.548,9.923,0.0,29.468,0.209,0.174,1587.0,1121.0,1.0
impute_censored[5],10.381,10.551,0.004,31.161,0.187,0.154,2512.0,1096.0,1.0
impute_censored[6],10.046,10.421,0.001,31.706,0.198,0.166,2417.0,1034.0,1.0


$$
\text{The 95% Credible Set for } \mu_1 - \mu_0 \text{ is not all positive.} \\
\text{It ranges from } ~-5 \ to ~8
\\~\\
\text{PART B}
$$

$$
\text{PART C}\\
\text{Since the mean contain 0, it can be said that the difference is not significant.}\\
\text{The treatment can be said to not be beneficial.}
$$

$\textbf{QUESTION 3}$

In [139]:
concrete = pd.read_csv('concrete.csv', sep='\t')
print(f"{concrete.shape=}\n\nconcrete unqiue:\n{concrete.nunique()}")

concrete.shape=(1030, 9)

concrete unqiue:
x1    175
x2    119
x3     84
x4     74
x5    111
x6    178
x7    181
x8     15
y     847
dtype: int64


In [140]:
concreteX = concrete.drop(columns=['y']).to_numpy()
concreteY = concrete['y'].to_numpy()

In [141]:
# CODE BORROWED FROM AARON'S GH
concreteY = concreteY.repeat(3).reshape(1030, 3)

(1030, 3)


$$
\text{PART A}
$$

In [142]:
# SOME CODE BORROWED FROM AARON'S GH
with pm.Model() as conc:
    a = pm.Normal("a", 0, tau=0.00001, shape=9)
    b = pm.Normal("b", 0, tau=0.00001, shape=8)
    c = pm.Normal("c", 0, tau=0.00001, shape=8)
    tau = pm.Gamma("tau", 0.01, 0.01, shape=3)

    _mu = [a[0] + a[1] * concreteX[:, 0] + a[2] * concreteX[:, 1] + a[3] * concreteX[:, 2] + a[4] * concreteX[:, 3] + a[5] * concreteX[:, 4] + a[6] * concreteX[:, 5] + a[7] * concreteX[:, 6] + a[8] * concreteX[:, 7],
           b[0] + b[1] * concreteX[:, 0] + b[2] * concreteX[:, 1] + b[3] * concreteX[:, 2] + b[4] * concreteX[:, 3] + b[5] * concreteX[:, 4] + b[6] * concreteX[: , 6] + b[7] * concreteX[:, 7],
           c[0] + c[1] * concreteX[:, 0] + c[2] * concreteX[:, 1] + c[3] * concreteX[:, 2] + c[4] * concreteX[:, 3] + c[5] * concreteX[:, 4] + c[6] * concreteX[:, 5] + c[7] * concreteX[:, 7],]

    mu = pm.math.stack(_mu)
    pm.Normal("likelihood",
              mu=mu.T,
              tau=tau,
              observed=concreteY)

    concrete_trace = pm.sample(draws=1000, target_accept=0.95)

In [144]:
with conc:
    concrete_strength = pm.sample_posterior_predictive(concrete_trace)

In [145]:
concrete_pred = concrete_strength.posterior_predictive.stack(sample=("chain", "draw"))["likelihood"].values.T
az.r2_score(concreteY, concrete_pred)

ValueError: ignored

In [146]:
az.summary(concrete_trace, hdi_prob=0.90)

Unnamed: 0,mean,sd,hdi_5%,hdi_95%,mcse_mean,mcse_sd,ess_bulk,ess_tail,r_hat
a[0],39.567,20.748,6.142,73.69,0.792,0.567,685.0,1080.0,1.0
a[1],10.089,0.754,8.898,11.316,0.027,0.019,805.0,1437.0,1.0
a[2],8.564,0.898,7.058,10.032,0.03,0.022,889.0,1256.0,1.0
a[3],6.058,1.197,4.121,8.007,0.04,0.028,890.0,1646.0,1.0
a[4],-21.551,3.83,-28.143,-15.531,0.129,0.094,879.0,1392.0,1.0
a[5],2.409,1.175,0.621,4.537,0.033,0.023,1285.0,1483.0,1.0
a[6],-0.544,0.742,-1.707,0.673,0.026,0.018,827.0,1237.0,1.0
a[7],-0.43,0.879,-1.86,1.038,0.031,0.022,826.0,1359.0,1.0
a[8],0.11,0.007,0.098,0.121,0.0,0.0,2452.0,1253.0,1.0
b[0],26.775,10.244,9.626,42.757,0.335,0.239,934.0,1148.0,1.0


$$
\text{PART B}
$$

In [147]:
az.loo(concrete_trace, pointwise=True)

TypeError: ignored

$$
\text{PART C}
$$