# Classification Label Noise Experiments

This notebook runs label noise experiments for 2D classification with IT and GL uncertainty decompositions.

The experiment evaluates how uncertainty estimates behave as the label noise rate (eta) increases.

**Label Noise (eta)**: Fraction of training labels that are randomly flipped to a different class.

- `eta = 0.0`: No label noise (clean labels)
- `eta = 0.1`: 10% of labels randomly flipped
- `eta = 0.3`: 30% of labels randomly flipped

**Expected Behavior**:
- Higher eta should increase aleatoric uncertainty (labels become unreliable everywhere)
- Unlike RCD (class overlap), label noise affects all regions uniformly

Models tested:
- MC Dropout (IT and GL)
- Deep Ensemble (IT and GL)
- BNN (IT and GL)

## Packages

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import sys
from pathlib import Path

# Add parent directory to path to import utils
project_root = Path.cwd().parent if Path.cwd().name == 'Experiments' else Path.cwd()
sys.path.insert(0, str(project_root))

# Setup results directory
results_dir = project_root / "results" / "classification" / "label_noise"
results_dir.mkdir(parents=True, exist_ok=True)
plots_dir = results_dir / "plots"
plots_dir.mkdir(exist_ok=True)
stats_dir = results_dir / "statistics"
stats_dir.mkdir(exist_ok=True)
outputs_dir = results_dir / "outputs"
outputs_dir.mkdir(exist_ok=True)

print(f"Results will be saved to: {results_dir}")

# Import classification experiment utilities
from utils.classification_data import simulate_dataset
from utils.classification_experiments import (
    run_mc_dropout_it_label_noise_experiment,
    run_mc_dropout_gl_label_noise_experiment,
    run_deep_ensemble_it_label_noise_experiment,
    run_deep_ensemble_gl_label_noise_experiment,
    run_bnn_it_label_noise_experiment,
    run_bnn_gl_label_noise_experiment,
)
from utils.device import get_device
import utils.results_save as results_save_module

# Set module-level directories
results_save_module.plots_dir = plots_dir
results_save_module.stats_dir = stats_dir
results_save_module.outputs_dir = outputs_dir

## Device Setup

In [None]:
device = get_device()
print(f"Using device: {device}")

## Data Generation Setup

Configure the base dataset (Gaussian blobs with 3 classes).

We use well-separated classes (rcd=3.0) so that any uncertainty increase is due to label noise, not class overlap.

In [None]:
# Reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

# Base configuration for Gaussian blob classification
base_cfg = {
    "N_train": 1000,
    "N_test": 500,
    "num_classes": 3,
    "rcd": 3.0,           # Well-separated classes (fixed)
    "blob_sigma": 0.25,   # Controls blob overlap
    "tau": 0.2,
    "eta": 0.0,           # Will be varied in label noise sweep
    "sigma_in": 0.0,      # No input noise
    "seed": seed,
    # Model hyperparameters (shared)
    "input_dim": 2,
    "epochs": 300,
    "batch_size": 32,
    "lr": 1e-3,
    "dropout_p": 0.25,
    "mc_samples": 50,
    "gl_samples": 100,
    "K": 5,  # Ensemble size
    "hidden_width": 32,
    "weight_scale": 1.0,
    "warmup": 200,
    "samples": 200,
    "chains": 1,
}

# Visualize effect of label noise
fig, axes = plt.subplots(1, 4, figsize=(16, 4))
eta_examples = [0.0, 0.1, 0.2, 0.3]
colors = ['tab:blue', 'tab:orange', 'tab:green']

for ax, eta in zip(axes, eta_examples):
    cfg = dict(base_cfg)
    cfg["eta"] = eta
    X_train, y_train, X_test, y_test, meta = simulate_dataset(cfg)
    
    for c in range(3):
        mask = y_train == c
        ax.scatter(X_train[mask, 0], X_train[mask, 1], c=colors[c], alpha=0.5, label=f'Class {c}', s=10)
    ax.scatter(meta['centers'][:, 0], meta['centers'][:, 1], c='black', marker='*', s=200, label='Centers')
    ax.set_title(f'eta = {eta} (Label Noise Rate)')
    ax.set_xlabel('x1')
    ax.set_ylabel('x2')
    ax.axis('equal')
    ax.grid(True, alpha=0.3)
    if eta == 0.0:
        ax.legend(loc='upper right', fontsize=8)

plt.suptitle('Effect of Label Noise on Training Data', fontsize=14)
plt.tight_layout()
plt.show()

print(f"\nNote: With label noise, some points appear in 'wrong' class colors.")
print(f"This simulates annotation errors in real-world datasets.")

## Set Parameters

In [None]:
# Label noise rates to test
eta_values = [0.0, 0.1, 0.2, 0.3]

# For BNN, use fewer values due to computational cost
eta_values_bnn = [0.0, 0.2]

print(f"Label noise rates (eta): {eta_values}")
print(f"Label noise rates (BNN): {eta_values_bnn}")

## MC Dropout IT

In [None]:
results_mc_dropout_it = run_mc_dropout_it_label_noise_experiment(
    base_cfg=base_cfg,
    eta_values=eta_values,
    seed=seed,
)

## MC Dropout GL

In [None]:
results_mc_dropout_gl = run_mc_dropout_gl_label_noise_experiment(
    base_cfg=base_cfg,
    eta_values=eta_values,
    seed=seed,
)

## Deep Ensemble IT

In [None]:
results_deep_ensemble_it = run_deep_ensemble_it_label_noise_experiment(
    base_cfg=base_cfg,
    eta_values=eta_values,
    seed=seed,
)

## Deep Ensemble GL

In [None]:
results_deep_ensemble_gl = run_deep_ensemble_gl_label_noise_experiment(
    base_cfg=base_cfg,
    eta_values=eta_values,
    seed=seed,
)

## BNN IT

In [None]:
# BNN is computationally expensive, so we use fewer eta values
results_bnn_it = run_bnn_it_label_noise_experiment(
    base_cfg=base_cfg,
    eta_values=eta_values_bnn,
    seed=seed,
)

## BNN GL

In [None]:
# BNN is computationally expensive, so we use fewer eta values
results_bnn_gl = run_bnn_gl_label_noise_experiment(
    base_cfg=base_cfg,
    eta_values=eta_values_bnn,
    seed=seed,
)