# Topic 4: Bayesian Hypothesis Testing and Model Averaging

## Learning Objectives
- Understand Bayesian hypothesis testing framework
- Master Bayes factors for model comparison
- Apply Bayesian model averaging
- Perform sensitivity analysis in model selection

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import pymc as pm
import arviz as az
from scipy.special import beta as beta_func

plt.style.use('seaborn-v0_8')
np.random.seed(42)

## 1. Bayes Factors

### Definition:
$$BF_{12} = \frac{P(D|M_1)}{P(D|M_2)} = \frac{\text{Evidence for } M_1}{\text{Evidence for } M_2}$$

### Interpretation (Kass & Raftery, 1995):
- BF > 10: Strong evidence for M₁
- BF > 3: Moderate evidence for M₁  
- BF ≈ 1: No evidence either way
- BF < 1/3: Moderate evidence for M₂
- BF < 1/10: Strong evidence for M₂

In [None]:
# Bayes Factor example: Is a coin fair?
# H₀: p = 0.5 (fair coin)
# H₁: p ≠ 0.5 (unfair coin)

def coin_bayes_factor(heads, tails, alpha=1, beta=1):
    """
    Calculate Bayes factor for fair vs unfair coin
    H0: p = 0.5 (point hypothesis)
    H1: p ~ Beta(alpha, beta) (composite hypothesis)
    """
    n = heads + tails
    
    # Likelihood under H0 (p = 0.5)
    likelihood_h0 = stats.binom.pmf(heads, n, 0.5)
    
    # Marginal likelihood under H1 (Beta-Binomial)
    # P(D|H1) = Beta(heads + alpha, tails + beta) / Beta(alpha, beta) * C(n, heads)
    marginal_h1 = (beta_func(heads + alpha, tails + beta) / 
                   beta_func(alpha, beta) * 
                   stats.binom.pmf(heads, n, 0.5) / (0.5**n))
    
    # Simpler formula for Beta-Binomial
    from scipy.special import comb
    marginal_h1 = (comb(n, heads) * 
                   beta_func(heads + alpha, tails + beta) / 
                   beta_func(alpha, beta))
    
    bayes_factor = likelihood_h0 / marginal_h1
    
    return bayes_factor, likelihood_h0, marginal_h1

# Test different scenarios
scenarios = [
    (5, 5, "Balanced"),
    (7, 3, "Slightly biased"),
    (8, 2, "Moderately biased"),
    (9, 1, "Strongly biased")
]

print("Bayes Factor Analysis: Fair vs Unfair Coin")
print("H₀: p = 0.5, H₁: p ~ Beta(1,1)")
print("\nScenario\t\tHeads/Tails\tBF₀₁\tEvidence")
print("-" * 55)

bfs = []
for heads, tails, description in scenarios:
    bf, _, _ = coin_bayes_factor(heads, tails)
    bfs.append(bf)
    
    if bf > 3:
        evidence = "For H₀ (fair)"
    elif bf > 1:
        evidence = "Weak for H₀"
    elif bf > 1/3:
        evidence = "Inconclusive"
    else:
        evidence = "Against H₀ (unfair)"
    
    print(f"{description:<15}\t{heads}/{tails}\t\t{bf:.2f}\t{evidence}")

# Visualization
plt.figure(figsize=(12, 8))

# Bayes factors
plt.subplot(2, 2, 1)
scenarios_names = [s[2] for s in scenarios]
colors = ['green' if bf > 3 else 'orange' if bf > 1 else 'red' for bf in bfs]
bars = plt.bar(scenarios_names, bfs, color=colors, alpha=0.7)
plt.axhline(1, color='black', linestyle='--', label='No evidence')
plt.axhline(3, color='green', linestyle=':', label='Moderate evidence')
plt.axhline(1/3, color='red', linestyle=':', label='Evidence against')
plt.ylabel('Bayes Factor (BF₀₁)')
plt.title('Bayes Factors for Fair Coin')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True, alpha=0.3)
plt.yscale('log')

# Posterior probabilities under H1
plt.subplot(2, 2, 2)
x = np.linspace(0, 1, 1000)
for i, (heads, tails, description) in enumerate(scenarios):
    posterior = stats.beta(1 + heads, 1 + tails)
    plt.plot(x, posterior.pdf(x), label=f'{description} ({heads}H, {tails}T)', 
             linewidth=2, alpha=0.8)

plt.axvline(0.5, color='black', linestyle='--', label='H₀: p = 0.5')
plt.xlabel('p (probability of heads)')
plt.ylabel('Density')
plt.title('Posterior Distributions under H₁')
plt.legend()
plt.grid(True, alpha=0.3)

# Sequential Bayes factors
plt.subplot(2, 2, 3)
# Simulate sequential coin flips
np.random.seed(42)
true_p = 0.7
n_flips = 30
flips = np.random.binomial(1, true_p, n_flips)

sequential_bfs = []
for i in range(1, n_flips + 1):
    heads = np.sum(flips[:i])
    tails = i - heads
    bf, _, _ = coin_bayes_factor(heads, tails)
    sequential_bfs.append(bf)

plt.plot(range(1, n_flips + 1), sequential_bfs, 'b-', linewidth=2)
plt.axhline(1, color='black', linestyle='--', alpha=0.7)
plt.axhline(3, color='green', linestyle=':', alpha=0.7)
plt.axhline(1/3, color='red', linestyle=':', alpha=0.7)
plt.xlabel('Number of Flips')
plt.ylabel('Bayes Factor (BF₀₁)')
plt.title(f'Sequential Evidence (true p = {true_p})')
plt.yscale('log')
plt.grid(True, alpha=0.3)

# Model probabilities
plt.subplot(2, 2, 4)
prior_h0 = 0.5  # Prior probability of H0
model_probs_h0 = []
model_probs_h1 = []

for bf in sequential_bfs:
    # Posterior model probabilities
    posterior_odds = bf * (prior_h0 / (1 - prior_h0))
    prob_h0 = posterior_odds / (1 + posterior_odds)
    prob_h1 = 1 - prob_h0
    
    model_probs_h0.append(prob_h0)
    model_probs_h1.append(prob_h1)

plt.plot(range(1, n_flips + 1), model_probs_h0, 'g-', linewidth=2, label='P(H₀|data)')
plt.plot(range(1, n_flips + 1), model_probs_h1, 'r-', linewidth=2, label='P(H₁|data)')
plt.axhline(0.5, color='black', linestyle='--', alpha=0.7)
plt.xlabel('Number of Flips')
plt.ylabel('Model Probability')
plt.title('Sequential Model Probabilities')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 2. Model Comparison with PyMC

Compare different regression models using WAIC and LOO.

In [None]:
# Generate synthetic data
np.random.seed(42)
n = 50
x = np.linspace(0, 1, n)
true_y = 2 + 3*x + 0.5*x**2 + np.random.normal(0, 0.3, n)

plt.figure(figsize=(10, 6))
plt.scatter(x, true_y, alpha=0.7, label='Data')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Synthetic Dataset')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Model 1: Linear
with pm.Model() as model_linear:
    # Priors
    alpha = pm.Normal('alpha', 0, 10)
    beta = pm.Normal('beta', 0, 10)
    sigma = pm.HalfNormal('sigma', 1)
    
    # Linear model
    mu = alpha + beta * x
    
    # Likelihood
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=true_y)
    
    # Sample
    trace_linear = pm.sample(1000, return_inferencedata=True, random_seed=42)

# Model 2: Quadratic
with pm.Model() as model_quad:
    # Priors
    alpha = pm.Normal('alpha', 0, 10)
    beta1 = pm.Normal('beta1', 0, 10)
    beta2 = pm.Normal('beta2', 0, 10)
    sigma = pm.HalfNormal('sigma', 1)
    
    # Quadratic model
    mu = alpha + beta1 * x + beta2 * x**2
    
    # Likelihood
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=true_y)
    
    # Sample
    trace_quad = pm.sample(1000, return_inferencedata=True, random_seed=42)

# Model 3: Cubic
with pm.Model() as model_cubic:
    # Priors
    alpha = pm.Normal('alpha', 0, 10)
    beta1 = pm.Normal('beta1', 0, 10)
    beta2 = pm.Normal('beta2', 0, 10)
    beta3 = pm.Normal('beta3', 0, 10)
    sigma = pm.HalfNormal('sigma', 1)
    
    # Cubic model
    mu = alpha + beta1 * x + beta2 * x**2 + beta3 * x**3
    
    # Likelihood
    y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=true_y)
    
    # Sample
    trace_cubic = pm.sample(1000, return_inferencedata=True, random_seed=42)

# Model comparison
models = {
    'Linear': trace_linear,
    'Quadratic': trace_quad,
    'Cubic': trace_cubic
}

# Calculate WAIC and LOO
comparison = az.compare(models, ic='waic')
print("Model Comparison (WAIC):")
print(comparison)

# Plot comparison
az.plot_compare(comparison)
plt.title('Model Comparison')
plt.show()

# Posterior predictive checks
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

x_pred = np.linspace(0, 1, 100)
model_names = ['Linear', 'Quadratic', 'Cubic']
traces = [trace_linear, trace_quad, trace_cubic]

for i, (name, trace) in enumerate(zip(model_names, traces)):
    axes[i].scatter(x, true_y, alpha=0.7, color='black', label='Data')
    
    # Posterior predictive samples
    if name == 'Linear':
        alpha_samples = trace.posterior['alpha'].values.flatten()
        beta_samples = trace.posterior['beta'].values.flatten()
        
        for j in range(0, len(alpha_samples), 50):
            y_pred = alpha_samples[j] + beta_samples[j] * x_pred
            axes[i].plot(x_pred, y_pred, 'b-', alpha=0.1)
    
    elif name == 'Quadratic':
        alpha_samples = trace.posterior['alpha'].values.flatten()
        beta1_samples = trace.posterior['beta1'].values.flatten()
        beta2_samples = trace.posterior['beta2'].values.flatten()
        
        for j in range(0, len(alpha_samples), 50):
            y_pred = (alpha_samples[j] + beta1_samples[j] * x_pred + 
                     beta2_samples[j] * x_pred**2)
            axes[i].plot(x_pred, y_pred, 'r-', alpha=0.1)
    
    else:  # Cubic
        alpha_samples = trace.posterior['alpha'].values.flatten()
        beta1_samples = trace.posterior['beta1'].values.flatten()
        beta2_samples = trace.posterior['beta2'].values.flatten()
        beta3_samples = trace.posterior['beta3'].values.flatten()
        
        for j in range(0, len(alpha_samples), 50):
            y_pred = (alpha_samples[j] + beta1_samples[j] * x_pred + 
                     beta2_samples[j] * x_pred**2 + beta3_samples[j] * x_pred**3)
            axes[i].plot(x_pred, y_pred, 'g-', alpha=0.1)
    
    axes[i].set_title(f'{name} Model')
    axes[i].set_xlabel('x')
    axes[i].set_ylabel('y')
    axes[i].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 3. Bayesian Model Averaging

Instead of selecting one "best" model, average predictions across models weighted by their posterior probabilities.

In [None]:
# Bayesian Model Averaging example
# Convert WAIC to model weights

def waic_to_weights(waic_values):
    """
    Convert WAIC values to model weights
    """
    # Use Akaike weights
    min_waic = np.min(waic_values)
    delta_waic = waic_values - min_waic
    weights = np.exp(-0.5 * delta_waic)
    weights = weights / np.sum(weights)
    return weights

# Get WAIC values
waic_values = np.array([comparison.loc[model, 'waic'] for model in ['Linear', 'Quadratic', 'Cubic']])
model_weights = waic_to_weights(waic_values)

print("Model Weights (based on WAIC):")
for i, (name, weight) in enumerate(zip(['Linear', 'Quadratic', 'Cubic'], model_weights)):
    print(f"{name}: {weight:.3f}")

# Bayesian Model Averaging prediction
x_new = np.array([0.25, 0.5, 0.75])  # New prediction points

print(f"\nBayesian Model Averaging Predictions:")
print(f"x\tLinear\tQuadratic\tCubic\tBMA")
print("-" * 45)

for x_val in x_new:
    # Individual model predictions (using posterior means)
    # Linear
    alpha_mean = trace_linear.posterior['alpha'].mean().values
    beta_mean = trace_linear.posterior['beta'].mean().values
    pred_linear = alpha_mean + beta_mean * x_val
    
    # Quadratic
    alpha_mean = trace_quad.posterior['alpha'].mean().values
    beta1_mean = trace_quad.posterior['beta1'].mean().values
    beta2_mean = trace_quad.posterior['beta2'].mean().values
    pred_quad = alpha_mean + beta1_mean * x_val + beta2_mean * x_val**2
    
    # Cubic
    alpha_mean = trace_cubic.posterior['alpha'].mean().values
    beta1_mean = trace_cubic.posterior['beta1'].mean().values
    beta2_mean = trace_cubic.posterior['beta2'].mean().values
    beta3_mean = trace_cubic.posterior['beta3'].mean().values
    pred_cubic = (alpha_mean + beta1_mean * x_val + 
                  beta2_mean * x_val**2 + beta3_mean * x_val**3)
    
    # BMA prediction
    predictions = np.array([pred_linear, pred_quad, pred_cubic])
    bma_pred = np.sum(model_weights * predictions)
    
    print(f"{x_val:.2f}\t{pred_linear:.2f}\t{pred_quad:.2f}\t\t{pred_cubic:.2f}\t{bma_pred:.2f}")

# Visualization of BMA
plt.figure(figsize=(12, 8))

# Model weights
plt.subplot(2, 2, 1)
plt.bar(['Linear', 'Quadratic', 'Cubic'], model_weights, 
        color=['blue', 'red', 'green'], alpha=0.7)
plt.ylabel('Model Weight')
plt.title('Model Weights (WAIC-based)')
plt.grid(True, alpha=0.3)

# BMA prediction
plt.subplot(2, 2, 2)
x_pred = np.linspace(0, 1, 100)

# Individual model predictions
alpha_mean = trace_linear.posterior['alpha'].mean().values
beta_mean = trace_linear.posterior['beta'].mean().values
pred_linear_full = alpha_mean + beta_mean * x_pred

alpha_mean = trace_quad.posterior['alpha'].mean().values
beta1_mean = trace_quad.posterior['beta1'].mean().values
beta2_mean = trace_quad.posterior['beta2'].mean().values
pred_quad_full = alpha_mean + beta1_mean * x_pred + beta2_mean * x_pred**2

alpha_mean = trace_cubic.posterior['alpha'].mean().values
beta1_mean = trace_cubic.posterior['beta1'].mean().values
beta2_mean = trace_cubic.posterior['beta2'].mean().values
beta3_mean = trace_cubic.posterior['beta3'].mean().values
pred_cubic_full = (alpha_mean + beta1_mean * x_pred + 
                   beta2_mean * x_pred**2 + beta3_mean * x_pred**3)

# BMA prediction
bma_pred_full = (model_weights[0] * pred_linear_full + 
                 model_weights[1] * pred_quad_full + 
                 model_weights[2] * pred_cubic_full)

plt.scatter(x, true_y, alpha=0.7, color='black', label='Data')
plt.plot(x_pred, pred_linear_full, 'b--', alpha=0.7, 
         label=f'Linear (w={model_weights[0]:.2f})')
plt.plot(x_pred, pred_quad_full, 'r--', alpha=0.7, 
         label=f'Quadratic (w={model_weights[1]:.2f})')
plt.plot(x_pred, pred_cubic_full, 'g--', alpha=0.7, 
         label=f'Cubic (w={model_weights[2]:.2f})')
plt.plot(x_pred, bma_pred_full, 'k-', linewidth=3, label='BMA')

plt.xlabel('x')
plt.ylabel('y')
plt.title('Bayesian Model Averaging')
plt.legend()
plt.grid(True, alpha=0.3)

# Model uncertainty
plt.subplot(2, 2, 3)
# Calculate prediction variance across models
pred_matrix = np.array([pred_linear_full, pred_quad_full, pred_cubic_full])
bma_variance = np.sum(model_weights[:, np.newaxis] * 
                      (pred_matrix - bma_pred_full)**2, axis=0)

plt.plot(x_pred, np.sqrt(bma_variance), 'purple', linewidth=2)
plt.xlabel('x')
plt.ylabel('Prediction Standard Deviation')
plt.title('Model Uncertainty')
plt.grid(True, alpha=0.3)

# Cumulative model weights (as data arrives)
plt.subplot(2, 2, 4)
# Simulate how model weights change with sample size
sample_sizes = range(10, len(x), 5)
weight_evolution = {'Linear': [], 'Quadratic': [], 'Cubic': []}

for n_sample in sample_sizes:
    x_sub = x[:n_sample]
    y_sub = true_y[:n_sample]
    
    # Fit models to subset (simplified - just compute AIC approximation)
    # Linear
    p_linear = np.polyfit(x_sub, y_sub, 1)
    pred_linear_sub = np.polyval(p_linear, x_sub)
    mse_linear = np.mean((y_sub - pred_linear_sub)**2)
    aic_linear = n_sample * np.log(mse_linear) + 2 * 2  # 2 parameters
    
    # Quadratic
    p_quad = np.polyfit(x_sub, y_sub, 2)
    pred_quad_sub = np.polyval(p_quad, x_sub)
    mse_quad = np.mean((y_sub - pred_quad_sub)**2)
    aic_quad = n_sample * np.log(mse_quad) + 2 * 3  # 3 parameters
    
    # Cubic
    p_cubic = np.polyfit(x_sub, y_sub, 3)
    pred_cubic_sub = np.polyval(p_cubic, x_sub)
    mse_cubic = np.mean((y_sub - pred_cubic_sub)**2)
    aic_cubic = n_sample * np.log(mse_cubic) + 2 * 4  # 4 parameters
    
    # Convert to weights
    aic_values = np.array([aic_linear, aic_quad, aic_cubic])
    weights_sub = waic_to_weights(aic_values)
    
    weight_evolution['Linear'].append(weights_sub[0])
    weight_evolution['Quadratic'].append(weights_sub[1])
    weight_evolution['Cubic'].append(weights_sub[2])

plt.plot(sample_sizes, weight_evolution['Linear'], 'b-', label='Linear')
plt.plot(sample_sizes, weight_evolution['Quadratic'], 'r-', label='Quadratic')
plt.plot(sample_sizes, weight_evolution['Cubic'], 'g-', label='Cubic')
plt.xlabel('Sample Size')
plt.ylabel('Model Weight')
plt.title('Evolution of Model Weights')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Key Takeaways

### Bayes Factors:
- **Direct evidence comparison** between models
- **Automatic Occam's razor** - penalizes complexity
- **Sensitive to priors** - especially for point hypotheses
- **Interpretation guidelines** help decision making

### Model Selection:
- **WAIC/LOO** provide practical approximations
- **Cross-validation** based approaches are robust
- **Posterior predictive checks** assess model adequacy

### Bayesian Model Averaging:
- **Accounts for model uncertainty**
- **Better calibrated predictions**
- **Robust to model selection uncertainty**
- **Computational overhead** but often worth it

## Next: Topic 5 - Regression Models