### IMPORTS

In [1]:
import pymc as pm
import numpy as np
import pandas as pd
import arviz as az



$\textbf{QUESTION 1}$

In [2]:
babies = pd.read_csv('babies.csv')
cord_clamped = babies['x']
not_clamped = babies['y']

In [3]:
print(cord_clamped.describe())

count    16.000000
mean      9.643750
std       1.714631
min       8.000000
25%       8.350000
50%       9.150000
75%      10.300000
max      13.800000
Name: x, dtype: float64


In [4]:
print(not_clamped.describe())

count    16.00000
mean     12.09375
std       2.23591
min       8.20000
25%      11.00000
50%      12.05000
75%      13.52500
max      16.20000
Name: y, dtype: float64


In [5]:

with pm.Model() as model:
    
    # Define noninformative Gamma priors
    shape_prior = 0.001
    rate_prior = 0.001
    alpha1 = pm.Gamma(name="alpha1", alpha=shape_prior, beta=rate_prior)
    beta1 = pm.Gamma(name="beta1", alpha=shape_prior, beta=rate_prior)
    alpha2 = pm.Gamma(name="alpha2", alpha=shape_prior, beta=rate_prior)
    beta2 = pm.Gamma(name="beta2", alpha=shape_prior, beta=rate_prior)
    
    # Priors for the means
    # mean1 = pm.Gamma(name='mean1', alpha=shape_prior, beta=rate_prior)
    # mean2 = pm.Gamma(name='mean2', alpha=shape_prior, beta=rate_prior)
    mean1, mean2 = alpha1 / beta1, alpha2 / beta2 

    # Likelihoods for the data
    likelihood1 = pm.Gamma(name='likelihood1', alpha=alpha1, beta=beta1, observed=cord_clamped)
    likelihood2 = pm.Gamma(name='likelihood2', alpha=alpha2, beta=beta2, observed=not_clamped)

    # Difference in means
    diff = mean1 - mean2
    diff_means = pm.Deterministic(name='diff_means', var=diff)

    # Sampling
    trace = pm.sample(draws=1000, tune=1000, target_accept=0.90, cores=None)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [alpha1, beta1, alpha2, beta2]


In [None]:
# View trace diff_means
diff_means_trace = az.summary(data=trace, var_names=['diff_means'], hdi_prob=.90)
print(f"Trace Summary for difference in means:\n{diff_means_trace}\n")

# Print trace summary
print(f"Trace Summary:\n{az.summary(data=trace, hdi_prob=.90)}\n")

# Check Credible Set
print(f"The 90% Credible Set for the difference of means:\n{diff_means_trace}")

$$
\text{The 90\% credible set doesn't contain 0.} \Rightarrow \text{The difference is statistically significant.}
$$

$\textbf{QUESTION 2}$

In [None]:
intraocular_pressure = pd.read_excel('iop2.xlsx', header=None, names=['indicator', 'cornea_thickness'])
low_iop = intraocular_pressure['indicator']
corn_thickness = intraocular_pressure['cornea_thickness']
corn_mean = corn_thickness.mean()
corn_std = corn_thickness.std()

In [None]:
print(low_iop.describe())

In [None]:
print(corn_thickness.describe())

In [None]:
# Borrow some code from Aaron's GitHub
def standardize(x, mu, sig):
        return (x - mu) / (2 * sig)

$$
\textbf{PART A}
$$

In [None]:
# Borrow some code from Aaron's GitHub
with pm.Model() as mod_logistic:

    # Define x, y
    corn_standard = standardize(x=corn_thickness,
                                mu=corn_mean,
                                sig=corn_std)
    corn_data = pm.Data(name="corn_data", 
                        value=corn_standard, 
                        mutable=True)
    iop_data = pm.Data(name="iop_data", 
                       value=low_iop, 
                       mutable=False)

    # Define alpha, beta for logistic regression
    alpha = pm.Normal(name="alpha", 
                      mu=0, 
                      sigma=2)
    betas = pm.Normal(name="beta", 
                      mu=0, 
                      sigma=1)

    logist = alpha + pm.math.dot(l=corn_data, 
                                 r=betas)
    p = pm.math.invlogit(logist)

    pm.Bernoulli(name="low_iop", 
                 p=p, 
                 observed=iop_data)

    trace_log = pm.sample(draws=1000, 
                          tune=1000, 
                          cores=None, 
                          chains=4)

# Summarize the trace
print(az.summary(data=trace_log,
                 hdi_prob=.95))

# Get Predictions
preds_log = pm.sample_posterior_predictive(trace=trace_log,
                                             predictions=True)
print(preds_log)

$$
\textbf{PART B}
$$

In [None]:
# Borrow some code from Aaron's GitHub
with pm.Model() as mod_logistic_490:

    # Define x, y
    corn_data = standardize(x=490,
                            mu=corn_mean,
                            sig=corn_std)
    
    iop_data = pm.Data(name="iop_data", 
                       value=low_iop, 
                       mutable=False)

    # Define alpha, beta for logistic regression
    alpha = pm.Normal(name="alpha", 
                      mu=0, 
                      sigma=2)
    betas = pm.Normal(name="beta", 
                      mu=0, 
                      sigma=1)

    logist = alpha + pm.math.dot(l=corn_data, 
                                 r=betas)
    p = pm.math.invlogit(logist)

    pm.Bernoulli(name="low_iop",
                p=p,
                observed=iop_data)

    trace_log490 = pm.sample(draws=1000,
                             tune=1000,
                             cores=None,
                             chains=4)

# Summarize the trace
print(az.summary(data=trace_log490,
                 hdi_prob=.95))

# Get Predictions
preds_log490 = pm.sample_posterior_predictive(trace=trace_log490,
                                             predictions=True)
print(preds_log490)

$$
\textbf{PART C}
$$

In [None]:
# Borrow some code from Aaron's GitHub
with pm.Model() as mod_probit:

    # Define x, y
    corn_standard = standardize(x=corn_thickness,
                                mu=corn_mean,
                                sig=corn_std)
    corn_data = pm.Data(name="corn_data",
                        value=corn_standard,
                        mutable=True)
    iop_data = pm.Data(name="iop_data",
                        value=low_iop,
                        mutable=False)

    # Define alpha, beta for logistic regression
    alpha = pm.Normal(name="alpha",
                      mu=0,
                      sigma=2)
    betas = pm.Normal(name="beta",
                      mu=0,
                      sigma=1)

    logist = alpha + pm.math.dot(l=corn_data,
                                 r=betas)
    p = pm.math.invprobit(logist)

    pm.Bernoulli(name="low_iop",
                 p=p,
                 observed=iop_data)

    trace_prob = pm.sample(draws=1000,
                           tune=1000,
                           cores=None,
                           chains=4,
                           idata_kwargs=dict(log_likelihood=True))

# Summarize the trace
print(az.summary(data=trace_prob,
                 hdi_prob=.95))

# Get Predictions
preds_probs = pm.sample_posterior_predictive(trace=trace_prob,
                                             predictions=True)
print(preds_probs)

# View Deviances
print(az.waic(data=trace_prob,
              scale="deviance"))

In [None]:
# Borrow some code from Aaron's GitHub
with pm.Model() as mod_probit_490:

    # Define x, y
    corn_data = standardize(x=490,
                            mu=corn_mean,
                            sig=corn_std)
    iop_data = pm.Data(name="iop_data",
                       value=low_iop,
                       mutable=False)

    # Define alpha, beta for logistic regression
    alpha = pm.Normal(name="alpha",
                      mu=0,
                      sigma=2)
    betas = pm.Normal(name="beta",
                      mu=0,
                      sigma=1)

    logist = alpha + pm.math.dot(l=corn_data,
                                 r=betas)
    p = pm.math.invprobit(logist)
    
    pm.Bernoulli(name="low_iop",
                 logit_p=p,
                 observed=iop_data)

    trace_prob490 = pm.sample(draws=1000,
                              tune=1000,
                              cores=None,
                              chains=4,
                              idata_kwargs=dict(log_likelihood=True))

# Summarize the trace
print(az.summary(data=trace_prob490,
                 hdi_prob=.95))

# Get Predictions
preds_probs490 = pm.sample_posterior_predictive(trace=trace_prob490,
                                                predictions=True)
print(preds_probs490)

# View Deviances
print(az.waic(data=trace_prob490,
              scale="deviance"))

$\textbf{QUESTION 3}$

In [None]:
micronuclei = pd.read_csv('micronuclei.csv')
rad_dose = micronuclei['x']
freq = micronuclei['y']

In [None]:
print(rad_dose.value_counts())
print(rad_dose.describe())

In [None]:
print(freq.value_counts())
print(freq.describe())

$$
\textbf{PART A}
$$

In [None]:
# with pm.Model() as mod_poisson:
    
#     # Priors for the regression coefficients
#     beta = pm.Normal('beta', mu=0, sigma=2)

#     # Expected value of the outcome (lambda parameter) using the log link function
#     mu = pm.math.exp(pm.math.dot(rad_dose, beta))

#     # Likelihood (sampling distribution) of observations
#     likelihood = pm.Poisson('likelihood', mu=mu, observed=freq)

#     # Use the No-U-Turn Sampler
#     trace = pm.sample(draws=1000, tune=1000, cores=None, chains=2)

# # Summarize the trace
# print(pm.summary(trace))

$$
\textbf{PART B}
$$