# Omitted Variable Bias (OVB) - Regression Demo

## Data-Generating Process

### True Underlying Model

$$Y = \beta_0 + \beta_1 X + \beta_2 Z + \varepsilon$$

where:
- $Y$: continuous outcome
- $X$: observed explanatory variable  
- $Z$: omitted (latent) variable that also affects $Y$
- $\varepsilon \sim N(0, \sigma^2)$: random noise
- $\beta_0, \beta_1, \beta_2$: true coefficients

### Constructing Correlation Between X and Z

To control the degree of confounding, we define $X$ as a noisy version of $Z$:

$$X = \rho Z + \sqrt{1 - \rho^2} \cdot \nu$$

where:
- $\rho$ is the correlation between $X$ and $Z$ (ranging from 0 to 1)
- $\nu \sim N(0, 1)$ is independent noise

This guarantees that $\text{Corr}(X, Z) = \rho$.

### Expected Behavior

- When $Z$ strongly influences $Y$ and is correlated with $X$, omitting $Z$ biases the estimated effect of $X$
- As $\rho$ increases, bias grows but the model becomes "confidently wrong"
- Adding more data reduces variance but **not** the bias itself

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm

# Set plotting style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

## Data Generating Function

In [None]:
def generate_toy_regression_ovb(
    n_train: int = 1000,
    train_range: tuple = (0.0, 10.0),
    grid_points: int = 1000,
    noise_type: str = 'heteroscedastic',
    func_type: str = 'linear',
    rho: float = 0.7,
    beta2: float = 1.0,
    seed: int = 42
):
    """
    Generate toy regression data with OVB potential.
    
    Extends the standard regression DGP with a correlated latent variable Z.
    Model: Y = f(X) + beta2*Z + epsilon
    Where X and Z are correlated via rho.
    
    Parameters:
    -----------
    n_train : int
        Number of training samples
    train_range : tuple
        (min, max) range for X values (used for grid)
    grid_points : int
        Number of grid points for evaluation
    noise_type : str
        'homoscedastic' or 'heteroscedastic'
    func_type : str
        'linear' (f(x) = 0.7x + 0.5) or 'sin' (f(x) = x*sin(x) + x)
    rho : float
        Correlation between X and Z (controls confounding strength)
    beta2 : float
        Effect of omitted variable Z on Y
    seed : int
        Random seed for reproducibility
        
    Returns:
    --------
    X, Z, Y, x_grid, y_grid_clean : tuple
        Training data (X, Z, Y) and evaluation grid
    """
    rng = np.random.default_rng(seed)
    
    # Generate latent Z ~ N(0, 1)
    Z = rng.standard_normal(n_train)
    
    # Generate X correlated with Z: X = rho*Z + sqrt(1-rho^2)*nu
    # This ensures Corr(X, Z) = rho
    nu = rng.standard_normal(n_train)
    X = rho * Z + np.sqrt(1 - rho**2) * nu
    
    # Scale X to desired range (shift and scale from ~N(0,1) to train_range)
    X_scaled = X * (train_range[1] - train_range[0]) / 4 + (train_range[0] + train_range[1]) / 2
    
    # Define the function f(x)
    if func_type == 'linear':
        f_clean = lambda x: 0.7 * x + 0.5
    elif func_type == 'sin':
        f_clean = lambda x: x * np.sin(x) + x
    else:
        raise ValueError("func_type must be 'linear' or 'sin'")
    
    # Compute clean function output
    y_clean = f_clean(X_scaled)
    
    # Generate noise epsilon
    if noise_type == 'homoscedastic':
        sigma = 1.0
        epsilon = rng.normal(0, sigma, n_train)
    elif noise_type == 'heteroscedastic':
        sigma = np.abs(2.5 * np.sin(0.5 * X_scaled + 5))
        epsilon = rng.normal(0, sigma)
    else:
        raise ValueError("noise_type must be 'homoscedastic' or 'heteroscedastic'")
    
    # Generate Y with OVB structure: Y = f(X) + beta2*Z + epsilon
    Y = y_clean + beta2 * Z + epsilon
    
    # Create evaluation grid
    x_grid = np.linspace(train_range[0], train_range[1], grid_points)
    y_grid_clean = f_clean(x_grid)
    
    return X_scaled, Z, Y, x_grid, y_grid_clean

## Configuration

In [None]:
# Experiment configuration
cfg = {
    "n_train": 1000,
    "train_range": (0.0, 10.0),
    "grid_points": 1000,
    "noise_type": "heteroscedastic",  # 'homoscedastic' or 'heteroscedastic'
    "func_type": "linear",            # 'linear' or 'sin'
    "rho": 0.7,                       # Correlation between X and Z
    "beta2": 1.0,                     # Effect of omitted Z on Y
    "seed": 42,
}

print("Experiment Parameters:")
print(f"  n_train: {cfg['n_train']}")
print(f"  train_range: {cfg['train_range']}")
print(f"  noise_type: {cfg['noise_type']}")
print(f"  func_type: {cfg['func_type']}")
print(f"  rho (X-Z correlation): {cfg['rho']}")
print(f"  beta2 (effect of Z): {cfg['beta2']}")

## Generate Data and Visualize

In [None]:
# Generate data using the toy regression OVB function
X, Z, Y, x_grid, y_grid_clean = generate_toy_regression_ovb(**cfg)

# Verify empirical correlation
empirical_corr = np.corrcoef(X, Z)[0, 1]
print(f"Empirical correlation between X and Z: {empirical_corr:.4f} (target: {cfg['rho']})")
print(f"Function type: {cfg['func_type']}, Noise type: {cfg['noise_type']}")

# Create visualization
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# X vs Y with clean function
axes[0, 0].scatter(X, Y, alpha=0.5, s=10, label='Data')
axes[0, 0].plot(x_grid, y_grid_clean, 'r-', linewidth=2, label='f(x) clean')
axes[0, 0].set_xlabel('X (Observed)')
axes[0, 0].set_ylabel('Y (Outcome)')
axes[0, 0].set_title('X vs Y')
axes[0, 0].legend()

# Z vs Y
axes[0, 1].scatter(Z, Y, alpha=0.5, s=10, color='orange')
axes[0, 1].set_xlabel('Z (Omitted/Latent)')
axes[0, 1].set_ylabel('Y (Outcome)')
axes[0, 1].set_title('Z vs Y')

# X vs Z
axes[1, 0].scatter(X, Z, alpha=0.5, s=10, color='green')
axes[1, 0].set_xlabel('X (Observed)')
axes[1, 0].set_ylabel('Z (Omitted/Latent)')
axes[1, 0].set_title(f'X vs Z (Corr = {empirical_corr:.3f})')

# Distribution of Z
axes[1, 1].hist(Z, bins=30, alpha=0.7, color='purple', edgecolor='black')
axes[1, 1].set_xlabel('Z (Omitted/Latent)')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].set_title('Distribution of Z ~ N(0, 1)')

plt.tight_layout()
plt.show()

## Biased Regression (Omitting Z)

When we regress Y on X only (omitting Z), we get a biased estimate of $\beta_1$.

In [None]:
# Define true beta1 based on function type
# For linear: f(x) = 0.7*x + 0.5, so true coefficient on X is 0.7
true_beta1 = 0.7 if cfg['func_type'] == 'linear' else None

# Fit biased model: Y ~ X (omitting Z)
X_with_const = sm.add_constant(X)
model_biased = sm.OLS(Y, X_with_const).fit()

print("=" * 60)
print("BIASED MODEL: Y ~ X (Z omitted)")
print("=" * 60)
print(model_biased.summary())

print("\n" + "=" * 60)
print("COEFFICIENT COMPARISON")
print("=" * 60)
print(f"Estimated beta1: {model_biased.params[1]:.4f}")
print(f"True beta1:      {true_beta1:.4f}" if true_beta1 else "True beta1: N/A (nonlinear)")
print(f"Bias:            {model_biased.params[1] - true_beta1:.4f}" if true_beta1 else "Bias: N/A")

## Unbiased Regression (Including Z)

When we include Z in the regression, we recover the true coefficients.

In [None]:
# Fit unbiased model: Y ~ X + Z (full model)
XZ = np.column_stack([X, Z])
XZ_with_const = sm.add_constant(XZ)
model_unbiased = sm.OLS(Y, XZ_with_const).fit()

print("=" * 60)
print("UNBIASED MODEL: Y ~ X + Z")
print("=" * 60)
print(model_unbiased.summary())

print("\n" + "=" * 60)
print("COEFFICIENT COMPARISON")
print("=" * 60)
if true_beta1:
    print(f"Estimated beta1 (X): {model_unbiased.params[1]:.4f} (True: {true_beta1})")
    print(f"Estimated beta2 (Z): {model_unbiased.params[2]:.4f} (True: {cfg['beta2']})")
    print(f"Beta1 bias: {model_unbiased.params[1] - true_beta1:.4f}")
    print(f"Beta2 bias: {model_unbiased.params[2] - cfg['beta2']:.4f}")
else:
    print(f"Estimated beta1 (X): {model_unbiased.params[1]:.4f} (True: N/A - nonlinear)")
    print(f"Estimated beta2 (Z): {model_unbiased.params[2]:.4f} (True: {cfg['beta2']})")
    print(f"Beta2 bias: {model_unbiased.params[2] - cfg['beta2']:.4f}")

## Theoretical Bias Analysis

The OVB formula shows that the bias in the estimated $\beta_1$ when omitting $Z$ is:

$$\text{Bias}(\hat{\beta}_1) = \beta_2 \cdot \frac{\text{Cov}(X, Z)}{\text{Var}(X)} = \beta_2 \cdot \rho \cdot \frac{\sigma_Z}{\sigma_X}$$

Since we constructed $X$ and $Z$ to have unit variance, this simplifies to:
$$\text{Bias}(\hat{\beta}_1) \approx \beta_2 \cdot \rho$$

In [None]:
# Compute theoretical vs observed bias
theoretical_bias = cfg['beta2'] * cfg['rho']  # Approximate formula
observed_bias = model_biased.params[1] - true_beta1 if true_beta1 else np.nan

# More precise formula using actual variances
cov_xz = np.cov(X, Z)[0, 1]
var_x = np.var(X)
precise_theoretical_bias = cfg['beta2'] * cov_xz / var_x

print("=" * 60)
print("BIAS ANALYSIS")
print("=" * 60)
print(f"Theoretical bias (approx): beta2 * rho = {cfg['beta2']} * {cfg['rho']} = {theoretical_bias:.4f}")
print(f"Theoretical bias (precise): beta2 * Cov(X,Z)/Var(X) = {precise_theoretical_bias:.4f}")
if true_beta1:
    print(f"Observed bias: {observed_bias:.4f}")
    print(f"Difference: {abs(observed_bias - precise_theoretical_bias):.4f}")
else:
    print("Observed bias: N/A (nonlinear function)")

## Experiment: Varying Correlation (rho)

Let's see how the bias changes as we vary the correlation between X and Z.

In [None]:
# Experiment: vary rho and observe bias
rho_values = [0.0, 0.2, 0.4, 0.6, 0.8, 0.95]
results = []

# True beta1 depends on func_type: linear = 0.7
true_beta1 = 0.7 if cfg['func_type'] == 'linear' else None  # sin is nonlinear

for rho in rho_values:
    # Generate data with this rho
    X_exp, Z_exp, Y_exp, _, _ = generate_toy_regression_ovb(
        n_train=cfg['n_train'],
        train_range=cfg['train_range'],
        grid_points=cfg['grid_points'],
        noise_type=cfg['noise_type'],
        func_type=cfg['func_type'],
        rho=rho,
        beta2=cfg['beta2'],
        seed=cfg['seed']
    )
    
    # Fit biased model
    X_exp_const = sm.add_constant(X_exp)
    model_exp = sm.OLS(Y_exp, X_exp_const).fit()
    
    # Store results (for linear func_type, compare to 0.7)
    results.append({
        'rho': rho,
        'estimated_beta1': model_exp.params[1],
        'true_beta1': true_beta1,
        'bias': model_exp.params[1] - true_beta1 if true_beta1 else np.nan,
        'theoretical_bias': cfg['beta2'] * rho,
        'std_err': model_exp.bse[1],
        'ci_lower': model_exp.conf_int()[1, 0],
        'ci_upper': model_exp.conf_int()[1, 1],
    })

results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))

In [None]:
# Visualize how bias grows with rho
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Estimated beta1 vs rho
ax1 = axes[0]
ax1.errorbar(results_df['rho'], results_df['estimated_beta1'], 
             yerr=1.96 * results_df['std_err'], 
             fmt='o-', capsize=5, label='Estimated beta1 (95% CI)', color='blue')
if true_beta1:
    ax1.axhline(y=true_beta1, color='green', linestyle='--', linewidth=2, label=f"True beta1 = {true_beta1}")
ax1.set_xlabel('rho (X-Z Correlation)')
ax1.set_ylabel('Estimated beta1')
ax1.set_title('Estimated Coefficient vs. Confounding Strength')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Plot 2: Observed vs Theoretical bias
ax2 = axes[1]
if true_beta1:
    ax2.plot(results_df['rho'], results_df['bias'], 'o-', label='Observed Bias', color='red', linewidth=2)
ax2.plot(results_df['rho'], results_df['theoretical_bias'], 's--', label='Theoretical Bias (beta2 * rho)', color='orange', linewidth=2)
ax2.axhline(y=0, color='gray', linestyle='-', linewidth=1)
ax2.set_xlabel('rho (X-Z Correlation)')
ax2.set_ylabel('Bias in beta1 Estimate')
ax2.set_title('OVB: Bias Grows with Correlation')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Summary: Biased vs Unbiased Model Comparison

In [None]:
# Create summary comparison table
if true_beta1:
    summary_data = {
        'Model': ['Biased (Y ~ X)', 'Unbiased (Y ~ X + Z)'],
        'beta1 Estimate': [model_biased.params[1], model_unbiased.params[1]],
        'beta1 True': [true_beta1, true_beta1],
        'beta1 Bias': [model_biased.params[1] - true_beta1, model_unbiased.params[1] - true_beta1],
        'beta1 Std Err': [model_biased.bse[1], model_unbiased.bse[1]],
        'beta1 95% CI': [
            f"[{model_biased.conf_int()[1, 0]:.3f}, {model_biased.conf_int()[1, 1]:.3f}]",
            f"[{model_unbiased.conf_int()[1, 0]:.3f}, {model_unbiased.conf_int()[1, 1]:.3f}]"
        ],
        'R-squared': [model_biased.rsquared, model_unbiased.rsquared],
    }
    
    summary_df = pd.DataFrame(summary_data)
    print("=" * 80)
    print("SUMMARY: BIASED vs UNBIASED REGRESSION")
    print("=" * 80)
    print(f"Configuration: n={cfg['n_train']}, func_type={cfg['func_type']}, beta2={cfg['beta2']}, rho={cfg['rho']}")
    print()
    print(summary_df.to_string(index=False))
    print()
    print("KEY INSIGHT:")
    print(f"  - The biased model overestimates beta1 by {model_biased.params[1] - true_beta1:.3f}")
    print(f"  - This is because it attributes Z's effect to X (since they're correlated)")
    print(f"  - The confidence interval for the biased estimate does NOT contain the true value!")
else:
    print("Summary not available for nonlinear function types.")

---

# Part 2: Uncertainty Estimation with MC Dropout

Now we use MC Dropout to estimate aleatoric and epistemic uncertainty when training on X only (omitting Z). We analyze how these uncertainties change as we vary:
1. **rho** (correlation between X and Z) - with fixed beta2
2. **beta2** (effect of omitted Z) - with fixed rho

Both variance-based and entropy-based (IT) decompositions are computed.

In [None]:
# Additional imports for uncertainty experiments
import torch
import sys
from pathlib import Path

# Add parent directory to path
project_root = Path.cwd().parent if Path.cwd().name == 'Experiments' else Path.cwd()
sys.path.insert(0, str(project_root))

# Setup results directory
results_dir = project_root / "results" / "ovb"
results_dir.mkdir(parents=True, exist_ok=True)
print(f"Results will be saved to: {results_dir}")

# Import OVB experiment functions
from utils.ovb_experiments import (
    # MC Dropout
    run_mc_dropout_ovb_rho_experiment,
    run_mc_dropout_ovb_beta2_experiment,
    # Deep Ensemble
    run_deep_ensemble_ovb_rho_experiment,
    run_deep_ensemble_ovb_beta2_experiment,
    # BNN
    run_bnn_ovb_rho_experiment,
    run_bnn_ovb_beta2_experiment,
    # BAMLSS
    run_bamlss_ovb_rho_experiment,
    run_bamlss_ovb_beta2_experiment,
    # Plotting
    plot_ovb_uncertainty_comparison,
    plot_ovb_heatmap_comparison,
    plot_ovb_marginal_comparison,
    plot_ovb_z_slices
)

from utils.device import get_device
device = get_device()

In [None]:
# MC Dropout hyperparameters
mc_cfg = {
    "n_train": 1000,
    "train_range": (-5, 15),
    "grid_points": 600,
    "func_type": ["linear", "sin"],
    "noise_type": ["homoscedastic", "heteroscedastic"],
    "seed": 42,
    # MC Dropout specific
    "p": 0.25,           # Dropout probability
    "beta": 0.5,         # Beta-NLL beta parameter
    "epochs": 500,
    "lr": 1e-3,
    "batch_size": 32,
    "mc_samples": 100,
    "entropy_method": "analytical",
}

print("MC Dropout Configuration:")
for k, v in mc_cfg.items():
    print(f"  {k}: {v}")

## Experiment 1: Varying rho (X-Z Correlation)

Fix beta2 and vary rho to see how increasing correlation between X and Z affects uncertainty estimates.

In [5]:
# Run rho experiment
rho_values = [0.0, 0.5, 1]
fixed_beta2 = 1.0

rho_results_df, rho_all_results = run_mc_dropout_ovb_rho_experiment(
    rho_values=rho_values,
    beta2=fixed_beta2,
    n_train=mc_cfg["n_train"],
    train_range=mc_cfg["train_range"],
    grid_points=mc_cfg["grid_points"],
    func_type=mc_cfg["func_type"],
    noise_type=mc_cfg["noise_type"],
    seed=mc_cfg["seed"],
    p=mc_cfg["p"],
    beta=mc_cfg["beta"],
    epochs=mc_cfg["epochs"],
    lr=mc_cfg["lr"],
    batch_size=mc_cfg["batch_size"],
    mc_samples=mc_cfg["mc_samples"],
    entropy_method=mc_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir
)

KeyboardInterrupt: 

In [None]:
# Display rho experiment results
print("=" * 80)
print("RHO EXPERIMENT RESULTS (Varying X-Z Correlation)")
print("=" * 80)
print(f"\nFixed beta2 = {fixed_beta2}")
print()
print(rho_results_df.to_string(index=False))

# Bar plot comparison
plot_ovb_uncertainty_comparison(rho_results_df, vary_param='rho')

### Detailed Heatmap and Marginal Visualizations (Rho Experiment)

Compare full model (with Z) vs omitted model (X only) using 2D heatmaps and marginal distributions.

In [None]:
# Heatmap and marginal visualizations for each rho value
for rho_val in rho_values:
    print(f"\n{'='*60}")
    print(f"Visualizations for rho = {rho_val}")
    print(f"{'='*60}")
    
    # 2D Heatmaps - Variance decomposition
    plot_ovb_heatmap_comparison(
        rho_all_results, 
        param_value=rho_val, 
        decomposition='variance',
        results_dir=results_dir
    )
    
    # 2D Heatmaps - Entropy decomposition
    plot_ovb_heatmap_comparison(
        rho_all_results, 
        param_value=rho_val, 
        decomposition='entropy',
        results_dir=results_dir
    )
    
    # Marginal distributions - Variance
    plot_ovb_marginal_comparison(
        rho_all_results, 
        param_value=rho_val, 
        decomposition='variance',
        results_dir=results_dir
    )
    
    # Marginal distributions - Entropy
    plot_ovb_marginal_comparison(
        rho_all_results, 
        param_value=rho_val, 
        decomposition='entropy',
        results_dir=results_dir
    )
    
    # Z-slice comparison - Variance
    plot_ovb_z_slices(
        rho_all_results,
        param_value=rho_val,
        z_percentiles=[10, 50, 90],
        decomposition='variance',
        results_dir=results_dir
    )
    
    # Z-slice comparison - Entropy
    plot_ovb_z_slices(
        rho_all_results,
        param_value=rho_val,
        z_percentiles=[10, 50, 90],
        decomposition='entropy',
        results_dir=results_dir
    )

## Experiment 2: Varying beta2 (Effect of Omitted Z)

Fix rho and vary beta2 to see how increasing the effect of the omitted variable Z affects uncertainty estimates.

In [None]:
# Run beta2 experiment
beta2_values = [0.0, 0.5, 1.0, 2.0, 3.0]
fixed_rho = 0.7

beta2_results_df, beta2_all_results = run_mc_dropout_ovb_beta2_experiment(
    beta2_values=beta2_values,
    rho=fixed_rho,
    n_train=mc_cfg["n_train"],
    train_range=mc_cfg["train_range"],
    grid_points=mc_cfg["grid_points"],
    func_type=mc_cfg["func_type"],
    noise_type=mc_cfg["noise_type"],
    seed=mc_cfg["seed"],
    p=mc_cfg["p"],
    beta=mc_cfg["beta"],
    epochs=mc_cfg["epochs"],
    lr=mc_cfg["lr"],
    batch_size=mc_cfg["batch_size"],
    mc_samples=mc_cfg["mc_samples"],
    entropy_method=mc_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir
)

In [None]:
# Display beta2 experiment results
print("=" * 80)
print("BETA2 EXPERIMENT RESULTS (Varying Effect of Omitted Z)")
print("=" * 80)
print(f"\nFixed rho = {fixed_rho}")
print()
print(beta2_results_df.to_string(index=False))

# Bar plot comparison
plot_ovb_uncertainty_comparison(beta2_results_df, vary_param='beta2')

### Detailed Heatmap and Marginal Visualizations (Beta2 Experiment)

Compare full model (with Z) vs omitted model (X only) using 2D heatmaps and marginal distributions.

In [None]:
# Heatmap and marginal visualizations for each beta2 value
for beta2_val in beta2_values:
    print(f"\n{'='*60}")
    print(f"Visualizations for beta2 = {beta2_val}")
    print(f"{'='*60}")
    
    # 2D Heatmaps - Variance decomposition
    plot_ovb_heatmap_comparison(
        beta2_all_results, 
        param_value=beta2_val, 
        decomposition='variance',
        results_dir=results_dir
    )
    
    # 2D Heatmaps - Entropy decomposition
    plot_ovb_heatmap_comparison(
        beta2_all_results, 
        param_value=beta2_val, 
        decomposition='entropy',
        results_dir=results_dir
    )
    
    # Marginal distributions - Variance
    plot_ovb_marginal_comparison(
        beta2_all_results, 
        param_value=beta2_val, 
        decomposition='variance',
        results_dir=results_dir
    )
    
    # Marginal distributions - Entropy
    plot_ovb_marginal_comparison(
        beta2_all_results, 
        param_value=beta2_val, 
        decomposition='entropy',
        results_dir=results_dir
    )
    
    # Z-slice comparison - Variance
    plot_ovb_z_slices(
        beta2_all_results,
        param_value=beta2_val,
        z_percentiles=[10, 50, 90],
        decomposition='variance',
        results_dir=results_dir
    )
    
    # Z-slice comparison - Entropy
    plot_ovb_z_slices(
        beta2_all_results,
        param_value=beta2_val,
        z_percentiles=[10, 50, 90],
        decomposition='entropy',
        results_dir=results_dir
    )

## Summary: OVB Uncertainty Experiments

### Key Observations

**Varying rho (X-Z Correlation):**
- As rho increases, X and Z become more correlated
- The model trained on X only cannot distinguish X's effect from Z's confounded effect
- Epistemic uncertainty may change as the model becomes more/less confident

**Varying beta2 (Effect of Omitted Z):**
- As beta2 increases, the omitted variable Z has a larger effect on Y
- This introduces more unexplained variance from the model's perspective
- Both aleatoric and epistemic uncertainty estimates may be affected

In [None]:
# Combined summary statistics
print("=" * 80)
print("COMBINED SUMMARY: OVB UNCERTAINTY EXPERIMENTS")
print("=" * 80)

print("\n--- Rho Experiment (fixed beta2={}) ---".format(fixed_beta2))
print("Variance Decomposition:")
print(f"  AU range: {rho_results_df['mean_ale_var'].min():.4f} - {rho_results_df['mean_ale_var'].max():.4f}")
print(f"  EU range: {rho_results_df['mean_epi_var'].min():.4f} - {rho_results_df['mean_epi_var'].max():.4f}")
print("Entropy Decomposition:")
print(f"  AU range: {rho_results_df['mean_ale_entropy'].min():.4f} - {rho_results_df['mean_ale_entropy'].max():.4f}")
print(f"  EU range: {rho_results_df['mean_epi_entropy'].min():.4f} - {rho_results_df['mean_epi_entropy'].max():.4f}")

print("\n--- Beta2 Experiment (fixed rho={}) ---".format(fixed_rho))
print("Variance Decomposition:")
print(f"  AU range: {beta2_results_df['mean_ale_var'].min():.4f} - {beta2_results_df['mean_ale_var'].max():.4f}")
print(f"  EU range: {beta2_results_df['mean_epi_var'].min():.4f} - {beta2_results_df['mean_epi_var'].max():.4f}")
print("Entropy Decomposition:")
print(f"  AU range: {beta2_results_df['mean_ale_entropy'].min():.4f} - {beta2_results_df['mean_ale_entropy'].max():.4f}")
print(f"  EU range: {beta2_results_df['mean_epi_entropy'].min():.4f} - {beta2_results_df['mean_epi_entropy'].max():.4f}")

# Save combined results to CSV
rho_results_df.to_csv(results_dir / "ovb_rho_experiment_results.csv", index=False)
beta2_results_df.to_csv(results_dir / "ovb_beta2_experiment_results.csv", index=False)
print(f"\nResults saved to {results_dir}")

---

# Part 3: Deep Ensemble OVB Experiments

Deep Ensembles provide uncertainty estimates through the disagreement between ensemble members.
Training K independent networks and combining their predictions.

In [None]:
# Deep Ensemble hyperparameters
de_cfg = {
    "n_train": 1000,
    "train_range": (-5, 15),
    "grid_points": 600,
    "func_type": ["linear", "sin"],
    "noise_type": ["homoscedastic", "heteroscedastic"],
    "seed": 42,
    # Deep Ensemble specific
    "K": 10,               # Number of ensemble members
    "epochs": 500,
    "batch_size": 32,
    "entropy_method": "analytical",
}

print("Deep Ensemble Configuration:")
for k, v in de_cfg.items():
    print(f"  {k}: {v}")

## Deep Ensemble: Varying rho (X-Z Correlation)

In [None]:
# Run Deep Ensemble rho experiment
de_rho_values = [0.0, 0.5, 1.0]
de_fixed_beta2 = 1.0

de_rho_results_df, de_rho_all_results = run_deep_ensemble_ovb_rho_experiment(
    rho_values=de_rho_values,
    beta2=de_fixed_beta2,
    n_train=de_cfg["n_train"],
    train_range=de_cfg["train_range"],
    grid_points=de_cfg["grid_points"],
    func_type=de_cfg["func_type"],
    noise_type=de_cfg["noise_type"],
    seed=de_cfg["seed"],
    K=de_cfg["K"],
    epochs=de_cfg["epochs"],
    batch_size=de_cfg["batch_size"],
    entropy_method=de_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir / "deep_ensemble"
)

## Deep Ensemble: Varying beta2 (Effect of Omitted Z)

In [None]:
# Run Deep Ensemble beta2 experiment
de_beta2_values = [0.0, 0.5, 1.0, 2.0, 3.0]
de_fixed_rho = 0.7

de_beta2_results_df, de_beta2_all_results = run_deep_ensemble_ovb_beta2_experiment(
    beta2_values=de_beta2_values,
    rho=de_fixed_rho,
    n_train=de_cfg["n_train"],
    train_range=de_cfg["train_range"],
    grid_points=de_cfg["grid_points"],
    func_type=de_cfg["func_type"],
    noise_type=de_cfg["noise_type"],
    seed=de_cfg["seed"],
    K=de_cfg["K"],
    epochs=de_cfg["epochs"],
    batch_size=de_cfg["batch_size"],
    entropy_method=de_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir / "deep_ensemble"
)

---

# Part 4: BNN (Bayesian Neural Network) OVB Experiments

BNNs use MCMC sampling (NUTS) to approximate the posterior distribution over weights.
This provides principled uncertainty estimates from the weight posterior.

In [None]:
# BNN hyperparameters
bnn_cfg = {
    "n_train": 500,        # Smaller for BNN (slower)
    "train_range": (-5, 15),
    "grid_points": 300,    # Smaller grid for speed
    "func_type": ["linear", "sin"],
    "noise_type": ["homoscedastic", "heteroscedastic"],
    "seed": 42,
    # BNN specific
    "hidden_width": 16,
    "weight_scale": 1.0,
    "warmup": 100,         # Reduced for speed
    "samples": 100,        # Reduced for speed
    "entropy_method": "analytical",
}

print("BNN Configuration:")
for k, v in bnn_cfg.items():
    print(f"  {k}: {v}")

## BNN: Varying rho (X-Z Correlation)

In [None]:
# Run BNN rho experiment
bnn_rho_values = [0.0, 0.5, 1.0]
bnn_fixed_beta2 = 1.0

bnn_rho_results_df, bnn_rho_all_results = run_bnn_ovb_rho_experiment(
    rho_values=bnn_rho_values,
    beta2=bnn_fixed_beta2,
    n_train=bnn_cfg["n_train"],
    train_range=bnn_cfg["train_range"],
    grid_points=bnn_cfg["grid_points"],
    func_type=bnn_cfg["func_type"],
    noise_type=bnn_cfg["noise_type"],
    seed=bnn_cfg["seed"],
    hidden_width=bnn_cfg["hidden_width"],
    weight_scale=bnn_cfg["weight_scale"],
    warmup=bnn_cfg["warmup"],
    samples=bnn_cfg["samples"],
    entropy_method=bnn_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir / "bnn"
)

## BNN: Varying beta2 (Effect of Omitted Z)

In [None]:
# Run BNN beta2 experiment
bnn_beta2_values = [0.0, 0.5, 1.0, 2.0, 3.0]
bnn_fixed_rho = 0.7

bnn_beta2_results_df, bnn_beta2_all_results = run_bnn_ovb_beta2_experiment(
    beta2_values=bnn_beta2_values,
    rho=bnn_fixed_rho,
    n_train=bnn_cfg["n_train"],
    train_range=bnn_cfg["train_range"],
    grid_points=bnn_cfg["grid_points"],
    func_type=bnn_cfg["func_type"],
    noise_type=bnn_cfg["noise_type"],
    seed=bnn_cfg["seed"],
    hidden_width=bnn_cfg["hidden_width"],
    weight_scale=bnn_cfg["weight_scale"],
    warmup=bnn_cfg["warmup"],
    samples=bnn_cfg["samples"],
    entropy_method=bnn_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir / "bnn"
)

---

# Part 5: BAMLSS (Bayesian GAMLSS) OVB Experiments

BAMLSS uses R's bamlss package via rpy2 for Bayesian distributional regression.
Requires R and the bamlss package to be installed.

In [None]:
# BAMLSS hyperparameters
bamlss_cfg = {
    "n_train": 500,        # Smaller for BAMLSS (slowest)
    "train_range": (-5, 15),
    "grid_points": 200,    # Smaller grid for speed
    "func_type": ["linear", "sin"],
    "noise_type": ["homoscedastic", "heteroscedastic"],
    "seed": 42,
    # BAMLSS specific
    "n_iter": 6000,        # Reduced for speed
    "burnin": 1000,
    "thin": 5,
    "nsamples": 500,
    "entropy_method": "analytical",
}

print("BAMLSS Configuration:")
for k, v in bamlss_cfg.items():
    print(f"  {k}: {v}")

## BAMLSS: Varying rho (X-Z Correlation)

In [None]:
# Run BAMLSS rho experiment
bamlss_rho_values = [0.0, 0.5, 1.0]
bamlss_fixed_beta2 = 1.0

bamlss_rho_results_df, bamlss_rho_all_results = run_bamlss_ovb_rho_experiment(
    rho_values=bamlss_rho_values,
    beta2=bamlss_fixed_beta2,
    n_train=bamlss_cfg["n_train"],
    train_range=bamlss_cfg["train_range"],
    grid_points=bamlss_cfg["grid_points"],
    func_type=bamlss_cfg["func_type"],
    noise_type=bamlss_cfg["noise_type"],
    seed=bamlss_cfg["seed"],
    n_iter=bamlss_cfg["n_iter"],
    burnin=bamlss_cfg["burnin"],
    thin=bamlss_cfg["thin"],
    nsamples=bamlss_cfg["nsamples"],
    entropy_method=bamlss_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir / "bamlss"
)

## BAMLSS: Varying beta2 (Effect of Omitted Z)

In [None]:
# Run BAMLSS beta2 experiment
bamlss_beta2_values = [0.0, 0.5, 1.0, 2.0, 3.0]
bamlss_fixed_rho = 0.7

bamlss_beta2_results_df, bamlss_beta2_all_results = run_bamlss_ovb_beta2_experiment(
    beta2_values=bamlss_beta2_values,
    rho=bamlss_fixed_rho,
    n_train=bamlss_cfg["n_train"],
    train_range=bamlss_cfg["train_range"],
    grid_points=bamlss_cfg["grid_points"],
    func_type=bamlss_cfg["func_type"],
    noise_type=bamlss_cfg["noise_type"],
    seed=bamlss_cfg["seed"],
    n_iter=bamlss_cfg["n_iter"],
    burnin=bamlss_cfg["burnin"],
    thin=bamlss_cfg["thin"],
    nsamples=bamlss_cfg["nsamples"],
    entropy_method=bamlss_cfg["entropy_method"],
    save_plots=True,
    results_dir=results_dir / "bamlss"
)