# Classification OVB (Omitted Variable Bias) Experiments

This notebook implements Omitted Variable Bias experiments for classification, mirroring the regression OVB setup.

## Data Generating Process

**True Model:**
$$Z \sim N(0, 1)$$
$$X = \rho Z + \sqrt{1 - \rho^2} \cdot \nu, \quad \nu \sim N(0, 1)$$
$$\text{latent} = f(X) + \beta_2 Z$$
$$P(Y=1 | X, Z) = \sigma(\text{latent})$$
$$Y \sim \text{Bernoulli}(P(Y=1))$$

**Parameters:**
- $\rho$: Correlation between X and Z (0 to 1)
- $\beta_2$: Effect of omitted variable Z on classification boundary
- $f(X)$: 'linear' or 'sin' (nonlinear)

**Expected Behavior:**
- High $\rho$, high $\beta_2$: Strong OVB, AU inflated in omitted model
- High $\rho$, low $\beta_2$: Weak OVB, Z doesn't matter much
- Low $\rho$, high $\beta_2$: Moderate OVB, can't recover Z from X

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys
from pathlib import Path

# Ensure project root is on sys.path when running from Experiments/
project_root = Path.cwd().parent if Path.cwd().name == "Experiments" else Path.cwd()
sys.path.insert(0, str(project_root))

from utils.ovb_classification_experiments import (
    # Data generation
    generate_ovb_classification_data,
    generate_ovb_2d_grid,
    # MC Dropout IT
    run_mc_dropout_it_ovb_rho_experiment,
    run_mc_dropout_it_ovb_beta2_experiment,
    # MC Dropout GL
    run_mc_dropout_gl_ovb_rho_experiment,
    run_mc_dropout_gl_ovb_beta2_experiment,
    # Deep Ensemble IT
    run_deep_ensemble_it_ovb_rho_experiment,
    run_deep_ensemble_it_ovb_beta2_experiment,
    # Deep Ensemble GL
    run_deep_ensemble_gl_ovb_rho_experiment,
    run_deep_ensemble_gl_ovb_beta2_experiment,
    # BNN IT
    run_bnn_it_ovb_rho_experiment,
    run_bnn_it_ovb_beta2_experiment,
    # BNN GL
    run_bnn_gl_ovb_rho_experiment,
    run_bnn_gl_ovb_beta2_experiment,
    # Plotting
    plot_ovb_classification_heatmap,
    plot_ovb_classification_marginals,
    plot_ovb_classification_z_slices,
    plot_ovb_classification_summary,
)

np.random.seed(42)

## Configuration

In [None]:
# Results directory
results_dir = Path("results/ovb_classification")
results_dir.mkdir(parents=True, exist_ok=True)

# Common configuration
base_cfg = {
    "n_train": 1000,
    "train_range": (-3.0, 3.0),
    "grid_points": 50,
    "func_type": ["linear", "sin"],  # Can be list for multi-config
    "seed": 42,
}

# Experiment parameters
rho_values = [0.0, 0.3, 0.5, 0.7, 0.9]
beta2_values = [0.0, 0.5, 1.0, 2.0, 3.0]
fixed_beta2 = 1.0  # For rho experiments
fixed_rho = 0.7    # For beta2 experiments

## Visualize Data Generating Process

In [None]:
# Generate example data to visualize the DGP
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

rho_examples = [0.0, 0.5, 0.9]

for col, rho in enumerate(rho_examples):
    X, Z, Y, true_probs, x_grid, z_grid = generate_ovb_classification_data(
        n_train=500, rho=rho, beta2=1.0, func_type='linear', seed=42
    )
    
    # Plot training data colored by class
    ax = axes[0, col]
    scatter = ax.scatter(X.flatten(), Z, c=Y, cmap='RdBu', alpha=0.6, s=10)
    ax.set_xlabel('X (observed)')
    ax.set_ylabel('Z (omitted)')
    ax.set_title(f'Training Data (rho={rho})')
    plt.colorbar(scatter, ax=ax, label='Class')
    
    # Plot true probabilities on grid
    ax = axes[1, col]
    X_grid_2d, Z_grid_2d, true_probs_2d = generate_ovb_2d_grid(
        x_grid, z_grid, func_type='linear', beta2=1.0
    )
    n_x, n_z = len(x_grid), len(z_grid)
    probs_2d = true_probs_2d.reshape(n_z, n_x)
    im = ax.imshow(probs_2d, extent=[x_grid.min(), x_grid.max(), z_grid.min(), z_grid.max()],
                   origin='lower', aspect='auto', cmap='RdBu', vmin=0, vmax=1)
    ax.set_xlabel('X (observed)')
    ax.set_ylabel('Z (omitted)')
    ax.set_title(f'True P(Y=1|X,Z) (rho={rho})')
    plt.colorbar(im, ax=ax, label='P(Y=1)')

plt.suptitle('OVB Classification DGP: Effect of rho on X-Z correlation', fontsize=14)
plt.tight_layout()
plt.savefig(results_dir / 'dgp_visualization.png', dpi=150, bbox_inches='tight')
plt.show()

---

# MC Dropout IT Experiments

In [None]:
# MC Dropout IT hyperparameters
mc_it_cfg = {
    **base_cfg,
    "epochs": 300,
    "lr": 1e-3,
    "batch_size": 32,
    "dropout_p": 0.25,
    "mc_samples": 100,
}

print("MC Dropout IT Configuration:")
for k, v in mc_it_cfg.items():
    print(f"  {k}: {v}")

In [None]:
# Run MC Dropout IT - Vary rho
mc_it_rho_df, mc_it_rho_results = run_mc_dropout_it_ovb_rho_experiment(
    rho_values=rho_values,
    beta2=fixed_beta2,
    n_train=mc_it_cfg["n_train"],
    train_range=mc_it_cfg["train_range"],
    grid_points=mc_it_cfg["grid_points"],
    func_type=mc_it_cfg["func_type"],
    seed=mc_it_cfg["seed"],
    epochs=mc_it_cfg["epochs"],
    lr=mc_it_cfg["lr"],
    batch_size=mc_it_cfg["batch_size"],
    dropout_p=mc_it_cfg["dropout_p"],
    mc_samples=mc_it_cfg["mc_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nMC Dropout IT - Rho Experiment Summary:")
display(mc_it_rho_df)

In [None]:
# Run MC Dropout IT - Vary beta2
mc_it_beta2_df, mc_it_beta2_results = run_mc_dropout_it_ovb_beta2_experiment(
    beta2_values=beta2_values,
    rho=fixed_rho,
    n_train=mc_it_cfg["n_train"],
    train_range=mc_it_cfg["train_range"],
    grid_points=mc_it_cfg["grid_points"],
    func_type=mc_it_cfg["func_type"],
    seed=mc_it_cfg["seed"],
    epochs=mc_it_cfg["epochs"],
    lr=mc_it_cfg["lr"],
    batch_size=mc_it_cfg["batch_size"],
    dropout_p=mc_it_cfg["dropout_p"],
    mc_samples=mc_it_cfg["mc_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nMC Dropout IT - Beta2 Experiment Summary:")
display(mc_it_beta2_df)

---

# MC Dropout GL Experiments

In [None]:
# MC Dropout GL hyperparameters
mc_gl_cfg = {
    **base_cfg,
    "epochs": 300,
    "lr": 1e-3,
    "batch_size": 32,
    "dropout_p": 0.25,
    "mc_samples": 100,
    "gl_samples": 100,
}

print("MC Dropout GL Configuration:")
for k, v in mc_gl_cfg.items():
    print(f"  {k}: {v}")

In [None]:
# Run MC Dropout GL - Vary rho
mc_gl_rho_df, mc_gl_rho_results = run_mc_dropout_gl_ovb_rho_experiment(
    rho_values=rho_values,
    beta2=fixed_beta2,
    n_train=mc_gl_cfg["n_train"],
    train_range=mc_gl_cfg["train_range"],
    grid_points=mc_gl_cfg["grid_points"],
    func_type=mc_gl_cfg["func_type"],
    seed=mc_gl_cfg["seed"],
    epochs=mc_gl_cfg["epochs"],
    lr=mc_gl_cfg["lr"],
    batch_size=mc_gl_cfg["batch_size"],
    dropout_p=mc_gl_cfg["dropout_p"],
    mc_samples=mc_gl_cfg["mc_samples"],
    gl_samples=mc_gl_cfg["gl_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nMC Dropout GL - Rho Experiment Summary:")
display(mc_gl_rho_df)

In [None]:
# Run MC Dropout GL - Vary beta2
mc_gl_beta2_df, mc_gl_beta2_results = run_mc_dropout_gl_ovb_beta2_experiment(
    beta2_values=beta2_values,
    rho=fixed_rho,
    n_train=mc_gl_cfg["n_train"],
    train_range=mc_gl_cfg["train_range"],
    grid_points=mc_gl_cfg["grid_points"],
    func_type=mc_gl_cfg["func_type"],
    seed=mc_gl_cfg["seed"],
    epochs=mc_gl_cfg["epochs"],
    lr=mc_gl_cfg["lr"],
    batch_size=mc_gl_cfg["batch_size"],
    dropout_p=mc_gl_cfg["dropout_p"],
    mc_samples=mc_gl_cfg["mc_samples"],
    gl_samples=mc_gl_cfg["gl_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nMC Dropout GL - Beta2 Experiment Summary:")
display(mc_gl_beta2_df)

---

# Deep Ensemble IT Experiments

In [None]:
# Deep Ensemble IT hyperparameters
de_it_cfg = {
    **base_cfg,
    "epochs": 300,
    "lr": 1e-3,
    "batch_size": 32,
    "K": 10,  # Number of ensemble members
}

print("Deep Ensemble IT Configuration:")
for k, v in de_it_cfg.items():
    print(f"  {k}: {v}")

In [None]:
# Run Deep Ensemble IT - Vary rho
de_it_rho_df, de_it_rho_results = run_deep_ensemble_it_ovb_rho_experiment(
    rho_values=rho_values,
    beta2=fixed_beta2,
    n_train=de_it_cfg["n_train"],
    train_range=de_it_cfg["train_range"],
    grid_points=de_it_cfg["grid_points"],
    func_type=de_it_cfg["func_type"],
    seed=de_it_cfg["seed"],
    epochs=de_it_cfg["epochs"],
    lr=de_it_cfg["lr"],
    batch_size=de_it_cfg["batch_size"],
    K=de_it_cfg["K"],
    save_plots=True,
    results_dir=results_dir
)

print("\nDeep Ensemble IT - Rho Experiment Summary:")
display(de_it_rho_df)

In [None]:
# Run Deep Ensemble IT - Vary beta2
de_it_beta2_df, de_it_beta2_results = run_deep_ensemble_it_ovb_beta2_experiment(
    beta2_values=beta2_values,
    rho=fixed_rho,
    n_train=de_it_cfg["n_train"],
    train_range=de_it_cfg["train_range"],
    grid_points=de_it_cfg["grid_points"],
    func_type=de_it_cfg["func_type"],
    seed=de_it_cfg["seed"],
    epochs=de_it_cfg["epochs"],
    lr=de_it_cfg["lr"],
    batch_size=de_it_cfg["batch_size"],
    K=de_it_cfg["K"],
    save_plots=True,
    results_dir=results_dir
)

print("\nDeep Ensemble IT - Beta2 Experiment Summary:")
display(de_it_beta2_df)

---

# Deep Ensemble GL Experiments

In [None]:
# Deep Ensemble GL hyperparameters
de_gl_cfg = {
    **base_cfg,
    "epochs": 300,
    "lr": 1e-3,
    "batch_size": 32,
    "K": 10,
    "gl_samples": 100,
}

print("Deep Ensemble GL Configuration:")
for k, v in de_gl_cfg.items():
    print(f"  {k}: {v}")

In [None]:
# Run Deep Ensemble GL - Vary rho
de_gl_rho_df, de_gl_rho_results = run_deep_ensemble_gl_ovb_rho_experiment(
    rho_values=rho_values,
    beta2=fixed_beta2,
    n_train=de_gl_cfg["n_train"],
    train_range=de_gl_cfg["train_range"],
    grid_points=de_gl_cfg["grid_points"],
    func_type=de_gl_cfg["func_type"],
    seed=de_gl_cfg["seed"],
    epochs=de_gl_cfg["epochs"],
    lr=de_gl_cfg["lr"],
    batch_size=de_gl_cfg["batch_size"],
    K=de_gl_cfg["K"],
    gl_samples=de_gl_cfg["gl_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nDeep Ensemble GL - Rho Experiment Summary:")
display(de_gl_rho_df)

In [None]:
# Run Deep Ensemble GL - Vary beta2
de_gl_beta2_df, de_gl_beta2_results = run_deep_ensemble_gl_ovb_beta2_experiment(
    beta2_values=beta2_values,
    rho=fixed_rho,
    n_train=de_gl_cfg["n_train"],
    train_range=de_gl_cfg["train_range"],
    grid_points=de_gl_cfg["grid_points"],
    func_type=de_gl_cfg["func_type"],
    seed=de_gl_cfg["seed"],
    epochs=de_gl_cfg["epochs"],
    lr=de_gl_cfg["lr"],
    batch_size=de_gl_cfg["batch_size"],
    K=de_gl_cfg["K"],
    gl_samples=de_gl_cfg["gl_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nDeep Ensemble GL - Beta2 Experiment Summary:")
display(de_gl_beta2_df)

---

# BNN IT Experiments

In [None]:
# BNN IT hyperparameters
bnn_it_cfg = {
    **base_cfg,
    "num_samples": 200,
    "warmup_steps": 100,
    "hidden_width": 32,
    "weight_scale": 1.0,
}

print("BNN IT Configuration:")
for k, v in bnn_it_cfg.items():
    print(f"  {k}: {v}")

In [None]:
# Run BNN IT - Vary rho
bnn_it_rho_df, bnn_it_rho_results = run_bnn_it_ovb_rho_experiment(
    rho_values=rho_values,
    beta2=fixed_beta2,
    n_train=bnn_it_cfg["n_train"],
    train_range=bnn_it_cfg["train_range"],
    grid_points=bnn_it_cfg["grid_points"],
    func_type=bnn_it_cfg["func_type"],
    seed=bnn_it_cfg["seed"],
    num_samples=bnn_it_cfg["num_samples"],
    warmup_steps=bnn_it_cfg["warmup_steps"],
    hidden_width=bnn_it_cfg["hidden_width"],
    weight_scale=bnn_it_cfg["weight_scale"],
    save_plots=True,
    results_dir=results_dir
)

print("\nBNN IT - Rho Experiment Summary:")
display(bnn_it_rho_df)

In [None]:
# Run BNN IT - Vary beta2
bnn_it_beta2_df, bnn_it_beta2_results = run_bnn_it_ovb_beta2_experiment(
    beta2_values=beta2_values,
    rho=fixed_rho,
    n_train=bnn_it_cfg["n_train"],
    train_range=bnn_it_cfg["train_range"],
    grid_points=bnn_it_cfg["grid_points"],
    func_type=bnn_it_cfg["func_type"],
    seed=bnn_it_cfg["seed"],
    num_samples=bnn_it_cfg["num_samples"],
    warmup_steps=bnn_it_cfg["warmup_steps"],
    hidden_width=bnn_it_cfg["hidden_width"],
    weight_scale=bnn_it_cfg["weight_scale"],
    save_plots=True,
    results_dir=results_dir
)

print("\nBNN IT - Beta2 Experiment Summary:")
display(bnn_it_beta2_df)

---

# BNN GL Experiments

In [None]:
# BNN GL hyperparameters
bnn_gl_cfg = {
    **base_cfg,
    "num_samples": 200,
    "warmup_steps": 100,
    "hidden_width": 32,
    "weight_scale": 1.0,
    "gl_samples": 100,
}

print("BNN GL Configuration:")
for k, v in bnn_gl_cfg.items():
    print(f"  {k}: {v}")

In [None]:
# Run BNN GL - Vary rho
bnn_gl_rho_df, bnn_gl_rho_results = run_bnn_gl_ovb_rho_experiment(
    rho_values=rho_values,
    beta2=fixed_beta2,
    n_train=bnn_gl_cfg["n_train"],
    train_range=bnn_gl_cfg["train_range"],
    grid_points=bnn_gl_cfg["grid_points"],
    func_type=bnn_gl_cfg["func_type"],
    seed=bnn_gl_cfg["seed"],
    num_samples=bnn_gl_cfg["num_samples"],
    warmup_steps=bnn_gl_cfg["warmup_steps"],
    hidden_width=bnn_gl_cfg["hidden_width"],
    weight_scale=bnn_gl_cfg["weight_scale"],
    gl_samples=bnn_gl_cfg["gl_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nBNN GL - Rho Experiment Summary:")
display(bnn_gl_rho_df)

In [None]:
# Run BNN GL - Vary beta2
bnn_gl_beta2_df, bnn_gl_beta2_results = run_bnn_gl_ovb_beta2_experiment(
    beta2_values=beta2_values,
    rho=fixed_rho,
    n_train=bnn_gl_cfg["n_train"],
    train_range=bnn_gl_cfg["train_range"],
    grid_points=bnn_gl_cfg["grid_points"],
    func_type=bnn_gl_cfg["func_type"],
    seed=bnn_gl_cfg["seed"],
    num_samples=bnn_gl_cfg["num_samples"],
    warmup_steps=bnn_gl_cfg["warmup_steps"],
    hidden_width=bnn_gl_cfg["hidden_width"],
    weight_scale=bnn_gl_cfg["weight_scale"],
    gl_samples=bnn_gl_cfg["gl_samples"],
    save_plots=True,
    results_dir=results_dir
)

print("\nBNN GL - Beta2 Experiment Summary:")
display(bnn_gl_beta2_df)

---

# Summary Comparison Across Models

In [None]:
import pandas as pd

# Combine all rho experiment results
all_rho_results = []

for model_name, df in [
    ('MC Dropout IT', mc_it_rho_df),
    ('MC Dropout GL', mc_gl_rho_df),
    ('Deep Ensemble IT', de_it_rho_df),
    ('Deep Ensemble GL', de_gl_rho_df),
    ('BNN IT', bnn_it_rho_df),
    ('BNN GL', bnn_gl_rho_df),
]:
    df_copy = df.copy()
    df_copy['model'] = model_name
    all_rho_results.append(df_copy)

all_rho_df = pd.concat(all_rho_results, ignore_index=True)

print("Combined Rho Experiment Results:")
display(all_rho_df)

In [None]:
# Plot comparison: Accuracy improvement from full model
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Filter for linear func_type only
linear_df = all_rho_df[all_rho_df['func_type'] == 'linear']

models = linear_df['model'].unique()
colors = plt.cm.tab10.colors

for i, model in enumerate(models):
    model_df = linear_df[linear_df['model'] == model]
    acc_improvement = model_df['acc_full'] - model_df['acc_omitted']
    axes[0].plot(model_df['rho'], acc_improvement, 'o-', label=model, color=colors[i], markersize=8)

axes[0].axhline(0, color='black', linestyle='--', linewidth=0.5)
axes[0].set_xlabel('rho (X-Z correlation)')
axes[0].set_ylabel('Accuracy Improvement (Full - Omitted)')
axes[0].set_title('Accuracy Gain from Including Z')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# AU inflation in omitted model
for i, model in enumerate(models):
    model_df = linear_df[linear_df['model'] == model]
    au_inflation = model_df['au_mean_omitted'] / (model_df['au_mean_full'] + 1e-8)
    axes[1].plot(model_df['rho'], au_inflation, 'o-', label=model, color=colors[i], markersize=8)

axes[1].axhline(1, color='black', linestyle='--', linewidth=0.5)
axes[1].set_xlabel('rho (X-Z correlation)')
axes[1].set_ylabel('AU Ratio (Omitted / Full)')
axes[1].set_title('AU Inflation from OVB')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.suptitle('OVB Effect Across Models (Linear Function)', fontsize=14)
plt.tight_layout()
plt.savefig(results_dir / 'model_comparison_rho.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
print("\nExperiment Complete!")
print(f"Results saved to: {results_dir.absolute()}")