In [None]:
import sys, os; sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__) if '__file__' in globals() else os.getcwd(), '..')))
#import os; os.chdir(os.path.dirname(os.getcwd()))
from utils.model_loader import get_model_fits
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
data_dir = f"datasets/friedman_correlated"
#results_dir_relu = "results/regression/single_layer/relu/friedman"
results_dir_tanh = "results/regression/single_layer/tanh/friedman_correlated"

#model_names_relu = ["Gaussian", "Regularized Horseshoe", "Dirichlet Horseshoe", "Dirichlet Student T"]
model_names_tanh = ["Gaussian tanh", "Regularized Horseshoe tanh", "Dirichlet Horseshoe tanh", "Dirichlet Student T tanh", "Pred CP tanh"]


#relu_fits = {}
tanh_fits = {}

files = sorted(f for f in os.listdir(data_dir) if f.endswith(".npz"))
for fname in files:
    base_config_name = fname.replace(".npz", "")  # e.g., "GAM_N100_p8_sigma1.00_seed1"
    full_config_path = f"{base_config_name}"  # → "type_1/GAM_N100_p8_sigma1.00_seed1"
    # relu_fit = get_model_fits(
    #     config=full_config_path,
    #     results_dir=results_dir_relu,
    #     models=model_names_relu,
    #     include_prior=False,
    # )
    
    tanh_fit = get_model_fits(
        config=full_config_path,
        results_dir=results_dir_tanh,
        models=model_names_tanh,
        include_prior=False,
    )
    

    #relu_fits[base_config_name] = relu_fit  # use clean key
    tanh_fits[base_config_name] = tanh_fit  # use clean key
    


In [None]:
import numpy as np
import pandas as pd
from scipy.special import logsumexp
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

# --- Inputs you already have ---
path = f"datasets/friedman_correlated/Friedman_N500_p10_sigma1.00_seed11.npz"
data = np.load(path)
X_test, y_test = data["X_test"], data["y_test"]
rows = []


def compute_ppll(y_test, mu_draws, sigma_draws):
    """
    y_test: (n,)
    mu_draws: (S, n) predictive means per draw (your 'output_test')
    sigma_draws: (S,) homoskedastic noise per draw
    returns:
      lppd_n: (n,) per-point log posterior predictive density
      ppll_total, ppll_mean
    """
    S, n = mu_draws.shape
    # log p(y_n | draw s) for Gaussian: -0.5*log(2π) - log σ_s - 0.5*((y - μ_s)^2/σ_s^2)
    resid2 = (mu_draws - y_test[None, :])**2                   # (S, n)
    loglik_sn = (
        -0.5 * np.log(2 * np.pi)
        - np.log(sigma_draws)[:, None]
        - 0.5 * resid2 / (sigma_draws[:, None] ** 2)
    )  # (S, n)
    # lppd_n = log( (1/S) * Σ_s exp(loglik_sn) ) = logsumexp - log S
    lppd_n = logsumexp(loglik_sn, axis=0) - np.log(S)
    return lppd_n, float(lppd_n.sum()), float(lppd_n.mean())

def predictive_mean_and_ci(mu_draws, sigma_draws, alpha=0.05):
    """
    Normal-mixture approx via total variance:
      E[Y|x] ≈ mean_s mu_s
      Var[Y|x] ≈ mean_s (sigma_s^2 + (mu_s - E[mu])^2)
    returns:
      y_mean: (n,), y_std: (n,), (low, high) CIs
    """
    y_mean = mu_draws.mean(axis=0)  # (n,)
    var_total = (sigma_draws**2)[:, None] + (mu_draws - y_mean[None, :])**2
    y_var = var_total.mean(axis=0)
    y_std = np.sqrt(y_var)
    z = 1.959963984540054  # ~95%
    return y_mean, y_std, (y_mean - z * y_std, y_mean + z * y_std)

def smoothness_along_pc1(X, y_mean):
    """
    Project X onto PC1, sort, compute a simple 'total variation' roughness.
    Returns:
      z (sorted PC1), y_sorted, tv (mean |Δy|)
    """
    pca = PCA(n_components=1).fit(X)
    z = pca.transform(X).ravel()
    idx = np.argsort(z)
    z_sorted = z[idx]
    y_sorted = y_mean[idx]
    dy = np.diff(y_sorted)
    tv = float(np.mean(np.abs(dy)))  # simple roughness score
    return z_sorted, y_sorted, tv

dataset_key = "Friedman_N500_p10_sigma1.00_seed11"
for model_name, model_entry in tanh_fits[dataset_key].items():
    post = model_entry["posterior"]

    # Predictive means (S, n_test). You already store 'output_test' as deterministic means.
    mu_draws = post.stan_variable("output_test").squeeze(-1)  # (S, n)
    sigma_draws = post.stan_variable("sigma").reshape(-1)     # (S,)

    # ---- Metrics: PPLL ----
    lppd_n, ppll_total, ppll_mean = compute_ppll(y_test, mu_draws, sigma_draws)

    # For reference: posterior mean predictions (also used in smoothness)
    y_mean, y_std, (ci_lo, ci_hi) = predictive_mean_and_ci(mu_draws, sigma_draws)

    # ---- Smoothness proxy: variation along PC1 of X_test ----
    z_sorted, y_sorted, tv = smoothness_along_pc1(X_test, y_mean)

    rows.append({
        "Model": model_name,
        "PPLL_total": ppll_total,
        "PPLL_mean": ppll_mean,
        "TV_PC1": tv,  # lower is smoother along dominant direction
        "n_draws": mu_draws.shape[0]
    })

    # ---- Minimal visuals ----
    fig, axes = plt.subplots(1, 2, figsize=(11, 4))

    # (A) Per-point log posterior predictive density distribution
    axes[0].hist(lppd_n, bins=30, edgecolor="k")
    axes[0].set_title(f"{model_name} — per-point log p(y|x,data)")
    axes[0].set_xlabel("lppd (per point)")
    axes[0].set_ylabel("count")

    # (B) Function profile along PC1 with ~95% predictive band (Normal-mixture approx)
    # Reorder to match z_sorted
    order = np.argsort(PCA(n_components=1).fit(X_test).transform(X_test).ravel())
    axes[1].plot(z_sorted, y_sorted, lw=2, label="posterior mean")
    axes[1].fill_between(z_sorted, ci_lo[order], ci_hi[order], alpha=0.2, label="~95% pred. band")
    axes[1].set_title(f"{model_name} — profile along PC1 (TV={tv:.3g})")
    axes[1].set_xlabel("PC1 score (X_test)")
    axes[1].set_ylabel("prediction")
    axes[1].legend()

    plt.tight_layout()
    plt.show()

results_ppll_df = pd.DataFrame(rows).sort_values("PPLL_mean", ascending=False)
print(results_ppll_df)


In [None]:
from sklearn.metrics import mean_squared_error
from properscoring import crps_ensemble
import numpy as np
import pandas as pd

path = f"datasets/friedman_correlated/Friedman_N500_p10_sigma1.00_seed11.npz"
data = np.load(path)
X_test, y_test = data["X_test"], data["y_test"]
rows = []
for model_name, model_entry in tanh_fits['Friedman_N500_p10_sigma1.00_seed11'].items():
    post = model_entry["posterior"]

    # (S, n_test)
    y_samps = post.stan_variable("output_test").squeeze(-1)

    # Posterior-mean predictions and RMSE
    y_mean = y_samps.mean(axis=0)                                   # (n_test,)
    rmse_post_mean = float(np.sqrt(mean_squared_error(y_test, y_mean)))

    # Per-draw RMSEs and their mean
    per_draw_rmse = np.sqrt(((y_samps - y_test[None, :])**2).mean(axis=1))  # (S,)
    rmse_draw_mean = float(per_draw_rmse.mean())

    # CRPS across the ensemble (expects shape (n_test, S))
    crps = float(np.mean(crps_ensemble(y_test, y_samps.T)))

    rows.append({
        "Model": model_name,
        "RMSE_posterior_mean": rmse_post_mean,
        "RMSE_mean_over_draws": rmse_draw_mean,
        "CRPS": crps,
        "n_draws": y_samps.shape[0]
    })

results_df = pd.DataFrame(rows).sort_values("RMSE_posterior_mean")
print(results_df)


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.special import logsumexp
from sklearn.decomposition import PCA

# -------------------------------------------------
# Config / inputs you already have
# -------------------------------------------------
rng = np.random.default_rng(12345)
dataset_key = "Friedman_N500_p10_sigma1.00_seed11"  # adjust if needed
path = f"datasets/friedman_correlated/{dataset_key}.npz"

data = np.load(path)
X_test = np.asarray(data["X_test"])
y_test = np.asarray(data["y_test"]).reshape(-1)  # ensure 1-D (n,)

# -------------------------------------------------
# Utilities
# -------------------------------------------------
def _to_Sn(a):
    """Ensure array is (S, n): flatten any trailing dims, add leading dim if needed."""
    a = np.asarray(a)
    if a.ndim == 1:
        return a[None, :]
    return a.reshape(a.shape[0], -1)

def predictive_samples(mu_draws, sigma_draws, rng, S_pred=None):
    mu_draws = _to_Sn(mu_draws)     # (S, n)
    sigma_draws = np.asarray(sigma_draws).reshape(-1)
    S, n = mu_draws.shape
    if S_pred is None or S_pred >= S:
        eps = rng.standard_normal(size=(S, n))
        return mu_draws + sigma_draws[:, None] * eps
    idx = rng.choice(S, size=S_pred, replace=False)
    eps = rng.standard_normal(size=(S_pred, n))
    return mu_draws[idx] + sigma_draws[idx, None] * eps

def coverage_and_width(y_test, y_pred, levels=(0.5, 0.8, 0.95)):
    y_pred = _to_Sn(y_pred)
    out = {}
    for lev in levels:
        alpha = (1.0 - lev) / 2.0
        lo = np.quantile(y_pred, alpha, axis=0)
        hi = np.quantile(y_pred, 1 - alpha, axis=0)
        cov = float(np.mean((y_test >= lo) & (y_test <= hi)))
        width = float(np.mean(hi - lo))
        out[f"cov_{int(lev*100)}"] = cov
        out[f"width_{int(lev*100)}"] = width
    return out

def predictive_mean_std(mu_draws, sigma_draws):
    mu_draws = _to_Sn(mu_draws)
    sigma_draws = np.asarray(sigma_draws).reshape(-1)
    y_mean = mu_draws.mean(axis=0)
    y_var = (sigma_draws**2)[:, None].mean(axis=0) + mu_draws.var(axis=0, ddof=0)
    return y_mean, np.sqrt(y_var)

def pit_values(y_test, y_pred):
    y_pred = _to_Sn(y_pred)
    return np.mean(y_pred <= y_test[None, :], axis=0)  # (n,)

def quantile_calibration(y_test, y_pred, q_grid=None, debug=False, tag=""):
    """
    y_test: (n,)
    y_pred: (S, n) or (S, n, 1)
    Returns (q_grid (Q,), emp (Q,), ece (float))
    """
    y_pred = _to_Sn(y_pred)                         # (S, n)
    n = y_pred.shape[1]
    if q_grid is None:
        q_grid = np.linspace(0.05, 0.95, 19)        # (Q,)

    # Predictive quantiles per x: (Q, n)
    # IMPORTANT: pass q_grid as 1-D → output shape (Q, n)
    qtile = np.quantile(y_pred, q_grid, axis=0)     # (Q, n)

    # Explicit broadcast y_test to (Q, n), then compare
    y_ref = np.broadcast_to(y_test.reshape(1, n), qtile.shape)  # (Q, n)
    comp = (y_ref <= qtile)                                     # (Q, n)

    if debug:
        print(f"[{tag}] y_test.shape  = {y_test.shape}")
        print(f"[{tag}] y_pred.shape  = {y_pred.shape}")
        print(f"[{tag}] q_grid.shape  = {q_grid.shape}")
        print(f"[{tag}] qtile.shape   = {qtile.shape}")
        print(f"[{tag}] comp.shape    = {comp.shape}")

    emp = comp.mean(axis=1)                          # (Q,)
    ece = float(np.mean(np.abs(emp - q_grid)))
    max_ece = float(np.max(np.abs(emp - q_grid)))
    return q_grid, emp, ece, max_ece

def compute_ppll(y_test, mu_draws, sigma_draws):
    mu_draws = _to_Sn(mu_draws)
    sigma_draws = np.asarray(sigma_draws).reshape(-1)
    S, n = mu_draws.shape
    resid2 = (mu_draws - y_test[None, :])**2  # (S, n)
    loglik_sn = (
        -0.5 * np.log(2 * np.pi)
        - np.log(sigma_draws)[:, None]
        - 0.5 * resid2 / (sigma_draws[:, None] ** 2)
    )  # (S, n)
    lppd_n = logsumexp(loglik_sn, axis=0) - np.log(S)  # (n,)
    return lppd_n, float(lppd_n.sum()), float(lppd_n.mean())

def smoothness_along_pc1(X, y_mean):
    pca = PCA(n_components=1).fit(X)
    z = pca.transform(X).ravel()
    idx = np.argsort(z)
    z_sorted = z[idx]
    y_sorted = y_mean[idx]
    tv = float(np.mean(np.abs(np.diff(y_sorted))))
    return z_sorted, y_sorted, tv

# -------------------------------------------------
# Loop models: PPLL + UQ diagnostics
# -------------------------------------------------
rows = []
first = False
for model_name, model_entry in tanh_fits[dataset_key].items():
    post = model_entry["posterior"]

    mu_draws = _to_Sn(post.stan_variable("output_test")).squeeze()
    mu_draws = _to_Sn(mu_draws)  # enforce (S, n)
    sigma_draws = np.asarray(post.stan_variable("sigma")).reshape(-1)  # (S,)

    # -------- PPLL --------
    lppd_n, ppll_total, ppll_mean = compute_ppll(y_test, mu_draws, sigma_draws)

    # -------- Smoothness (PC1) --------
    y_mean, y_std = predictive_mean_std(mu_draws, sigma_draws)
    z_sorted, y_sorted, tv_pc1 = smoothness_along_pc1(X_test, y_mean)

    # -------- Predictive samples for UQ --------
    y_pred = predictive_samples(mu_draws, sigma_draws, rng)  # (S, n)

    # Coverage & widths
    cov_info = coverage_and_width(y_test, y_pred, levels=(0.5, 0.8, 0.95))

    # PIT + quantile calibration (print shapes for first model)
    pit = pit_values(y_test, y_pred)  # (n,)
    q_grid, emp, ece, max_ece = quantile_calibration(y_test, y_pred, debug=first, tag=model_name)
    first = False

    rows.append({
        "Model": model_name,
        "PPLL_total": ppll_total,
        "PPLL_mean": ppll_mean,
        "TV_PC1": tv_pc1,
        **cov_info,
        "sharpness_mean_sd": float(y_std.mean()),
        "ECE_quantile": ece,
        "max_ECE": max_ece,
        "n_draws": mu_draws.shape[0],
    })

    # -------- Minimal visuals per model --------
    fig, axes = plt.subplots(1, 4, figsize=(17, 3.8))

    # (A) Per-point lppd histogram
    axes[0].hist(lppd_n, bins=30, edgecolor="k")
    axes[0].set_title(f"{model_name} — per-point log p(y|x,data)")
    axes[0].set_xlabel("lppd (per point)")
    axes[0].set_ylabel("count")

    # (B) Function profile along PC1 with ~95% predictive band (Normal-mixture approx)
    order = np.argsort(PCA(n_components=1).fit(X_test).transform(X_test).ravel())
    z_plot = z_sorted
    ci_lo = y_mean - 1.96 * y_std
    ci_hi = y_mean + 1.96 * y_std
    axes[1].plot(z_plot, y_sorted, lw=2, label="posterior mean")
    axes[1].fill_between(z_plot, ci_lo[order], ci_hi[order], alpha=0.2, label="~95% pred. band")
    axes[1].set_title(f"{model_name} — profile PC1 (TV={tv_pc1:.3g})")
    axes[1].set_xlabel("PC1 score (X_test)")
    axes[1].set_ylabel("prediction")
    axes[1].legend()

    # (C) PIT histogram
    axes[2].hist(pit, bins=20, range=(0, 1), edgecolor="k")
    axes[2].axhline(len(pit) / 20, ls="--")
    axes[2].set_title(f"{model_name} — PIT")
    axes[2].set_xlabel("PIT")
    axes[2].set_ylabel("count")

    # (D) Quantile calibration curve
    axes[3].plot(q_grid, q_grid, ls="--", label="ideal")
    axes[3].plot(q_grid, emp, marker="o", lw=1.5, label=f"empirical (ECE={ece:.3f})")
    axes[3].set_title(f"{model_name} — quantile calibration")
    axes[3].set_xlabel("nominal quantile q")
    axes[3].set_ylabel("empirical P(Y ≤ q̂_q)")
    axes[3].legend()

    plt.tight_layout()
    plt.show()

results_df = pd.DataFrame(rows).sort_values(["PPLL_mean"], ascending=False)
print(results_df)
