In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from typing import Tuple
import sys
from pathlib import Path
from datetime import datetime
import os
import pyro

# Add parent directory to path to import Models
# This works for notebooks in the Experiments folder
project_root = Path.cwd().parent if Path.cwd().name == 'Experiments' else Path.cwd()
sys.path.insert(0, str(project_root))

# Setup results directory
results_dir = project_root / "results" / "noise_level"
results_dir.mkdir(parents=True, exist_ok=True)
plots_dir = results_dir / "plots"
plots_dir.mkdir(exist_ok=True)
stats_dir = results_dir / "statistics"
stats_dir.mkdir(exist_ok=True)

print(f"Results will be saved to: {results_dir}")

# Import from Models folder
from Models.MC_Dropout import (
    MCDropoutRegressor,
    train_model,
    mc_dropout_predict,
    gaussian_nll,
    beta_nll,
    plot_toy_data,
    plot_uncertainties,
    normalize_x,
    normalize_x_data
)

from Models.Deep_Ensemble import (
    train_ensemble_deep,
    ensemble_predict_deep
)

from Models.BNN import (
    train_bnn,
    bnn_predict,
    normalize_x as bnn_normalize_x,
    normalize_x_data as bnn_normalize_x_data
)

from Models.BAMLSS import (
    fit_bamlss,
    bamlss_predict
)

from utils.device import get_device
from utils.plotting import plot_toy_data, plot_uncertainties_no_ood
import utils.results_save as results_save_module
from utils.results_save import save_plot, save_statistics, save_summary_text, save_summary_statistics

# Import helper functions for sample size experiments
from utils.sample_size_experiments import (
    run_mc_dropout_sample_size_experiment,
    run_deep_ensemble_sample_size_experiment,
    run_bnn_sample_size_experiment,
    run_bamlss_sample_size_experiment
)

# Import helper functions for noise level experiments
from utils.noise_level_experiments import (
    run_mc_dropout_noise_level_experiment,
    run_deep_ensemble_noise_level_experiment,
    run_bnn_noise_level_experiment,
    run_bamlss_noise_level_experiment
)

# Set the module-level directories for results_save
results_save_module.plots_dir = plots_dir
results_save_module.stats_dir = stats_dir


Error importing in API mode: ImportError('On Windows, cffi mode "ANY" is only "ABI".')
Trying to import in ABI mode.


Results will be saved to: c:\Users\lukas\OneDrive\Desktop\Code-Masterarbeit\A-statistical-evaluation-of-uncertainty-disentanglement-methods-1\results\noise_level
CUDA not available. Using CPU.
CUDA not available. Using CPU.


In [3]:
# Reproducibility
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)

# ----- Data generation for linear function with homo/heteroscedastic noise -----
# f(x) = 0.7x + 0.5
# noise_type: 'homoscedastic' (σ(x) = 0.20) or 'heteroscedastic' (σ(x) = 0.10 + 0.2(0.5 + 0.5sin(x)))
def generate_toy_regression(n_train=1000, train_range=(0.0, 10.0), 
                           grid_points=1000, noise_type='heteroscedastic', type = "linear", tau = 1 , distribution = "normal"):
    low, high = train_range
    x_train = np.random.uniform(low, high, size=(n_train, 1))
    
    if type == "linear":
        # Linear function: f(x) = 0.7x + 0.5
        f_clean = lambda x: 0.7 * x + 0.5
        y_clean_train = f_clean(x_train)
    elif type == "sin":
        f_clean = lambda x:  x * np.sin(x) + x
        y_clean_train = f_clean(x_train)
    else:
        raise ValueError("type must be 'linear', 'sin'")

    # Define noise variance σ²(x)
    if noise_type == 'homoscedastic':
        # Homoscedastic: σ(x) = tau
        sigma = tau
        sigma_train = np.full_like(x_train, sigma)
    elif noise_type == 'heteroscedastic':
        # Heteroscedastic: 
        sigma_train = np.abs(tau * np.sin(0.5*x_train +5))
    else:
        raise ValueError("noise_type must be 'homoscedastic' or 'heteroscedastic'")
    
    # Generate noise: ε | x ~ N(0, σ²(x))
    if distribution == "normal":
        epsilon = np.random.normal(0.0, sigma_train, size=(n_train, 1))
    elif distribution == "laplace":
        epsilon = np.random.laplace(0.0, sigma_train, size=(n_train, 1))
    else:
        raise ValueError("distribution must be 'normal' or 'laplace'")
    y_train = y_clean_train + epsilon

    # Dense evaluation grid within training range
    x_grid = np.linspace(train_range[0], train_range[1], grid_points).reshape(-1, 1)
    y_grid_clean = f_clean(x_grid)

    return (x_train.astype(np.float32), y_train.astype(np.float32),
            x_grid.astype(np.float32), y_grid_clean.astype(np.float32))

In [None]:
tau_values = [0.5, 1, 2, 2.5, 5, 10]

for tau in tau_values:
    # Polynomial function with homoscedastic noise
    x_train_homo, y_train_homo, x_grid_homo, y_clean_homo = generate_toy_regression(
        n_train=1000, 
        train_range=(-5,10), 
        noise_type='homoscedastic',
        type = "sin",
        tau = tau,
        distribution = "laplace"
    )

    plot_toy_data(x_train_homo, y_train_homo, x_grid_homo, y_clean_homo, title="Toy Regression Data Homescedastic (n=1000)")

    # Polynomial function with heteroscedastic noise (default - used in most experiments)
    x_train, y_train, x_grid, y_clean = generate_toy_regression(
        n_train = 1000, 
        train_range=(-5,10), 
        noise_type='heteroscedastic',
        type = "sin",
        tau = tau,
        distribution = "laplace"
    )

    plot_toy_data(x_train, y_train, x_grid, y_clean, title="Toy Regression Data Heteroscedastic (n=1000)")

## Set parameters

In [4]:
tau_values = [0.5, 1, 2, 2.5, 5]
tau_values_bnn = [0.5,5]
distributions = ['normal']
function_types = ['linear', 'sin']
n_train = 1000
train_range = (-5,10)
grid_points = 1500
seed = 42
torch.manual_seed(seed)


<torch._C.Generator at 0x254db7244b0>

In [None]:
run_mc_dropout_noise_level_experiment(
    generate_toy_regression,
    function_types=['linear'],
    noise_type='heteroscedastic',
    tau_values = [0.5, 1],
    distributions=distributions,
    n_train=n_train,
    train_range=train_range,
    grid_points=grid_points,
    seed=seed,
    beta=0.5,
    lr=1e-3,
    batch_size=32
)

# MC Droput

In [None]:
# Run noise level experiments for MC Dropout
# This will loop through tau values, distributions, and function types

## hetero ###

run_mc_dropout_noise_level_experiment(
    generate_toy_regression,
    function_types=['linear', 'sin'],
    noise_type='heteroscedastic',
    tau_values=tau_values,
    distributions=distributions,
    n_train=n_train,
    train_range=train_range,
    grid_points=grid_points,
    seed=seed,
    beta=0.5,
    lr=1e-3,
    batch_size=32
)

## homo ###

run_mc_dropout_noise_level_experiment(
    generate_toy_regression,
    function_types=function_types,
    noise_type='homoscedastic',
    tau_values=tau_values,
    distributions=distributions,
    n_train=n_train,
    train_range=train_range,
    grid_points=grid_points,
    seed=seed,
    beta=0.5,
    lr=1e-3,
    batch_size=32
)

## Deep Ensemble

In [None]:
# Deep Ensemble
run_deep_ensemble_noise_level_experiment(
    generate_toy_regression,
    function_types=function_types,
    noise_type='heteroscedastic',
    tau_values=tau_values,
    distributions=distributions,
    n_train=n_train,
    train_range=train_range,
    grid_points=grid_points,
    seed=seed,
    beta=0.5,
    batch_size=32,
)

run_deep_ensemble_noise_level_experiment(
    generate_toy_regression,
    function_types=function_types,
    noise_type='homoscedastic',
    tau_values=tau_values,
    distributions=distributions,
    n_train=n_train,
    train_range=train_range,
    grid_points=grid_points,
    seed=seed,
    beta=0.5,
    batch_size=32,
)

## BAMLLS

In [None]:

# BAMLSS
run_bamlss_noise_level_experiment(
    generate_toy_regression,
    function_types=function_types,
    noise_type='heteroscedastic',
    tau_values=tau_values_bnn,
    distributions=distributions,
    n_train=n_train,
    train_range=train_range,
    grid_points=grid_points,
    seed=seed,
    n_iter=12000,
    burnin=2000,
    thin=10,
    nsamples=1000
)


run_bamlss_noise_level_experiment(
    generate_toy_regression,
    function_types=function_types,
    noise_type='homoscedastic',
    tau_values=tau_values_bnn,
    distributions=distributions,
    n_train=n_train,
    train_range=train_range,
    grid_points=grid_points,
    seed=seed,
    n_iter=12000,
    burnin=2000,
    thin=10,
    nsamples=1000
)



################################################################################
# Function Type: Linear (linear) - Distribution: normal - BAMLSS
################################################################################

Using CPU parallelization with 2 workers (BAMLSS is CPU-only)


## BNN

In [None]:
# BNN
run_bnn_noise_level_experiment(
    generate_toy_regression,
    function_types=['linear', 'sin'],
    noise_type='heteroscedastic',
    tau_values=tau_values_bnn,
    distributions=['normal'],
    n_train=1000,
    train_range=(-5, 10),
    grid_points=1000,
    seed=42,
    hidden_width=16,
    weight_scale=1.0,
    warmup=500,
    samples=500,
    chains=4
)

run_bnn_noise_level_experiment(
    generate_toy_regression,
    function_types=['linear', 'sin'],
    noise_type='homoscedastic',
    tau_values= tau_values_bnn,
    distributions=['normal'],
    n_train=1000,
    train_range=(-5, 10),
    grid_points=1000,
    seed=42,
    hidden_width=16,
    weight_scale=1.0,
    warmup=500,
    samples=500,
    chains=4
)