In [None]:
import sys, os; sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__) if '__file__' in globals() else os.getcwd(), '..')))
#import os; os.chdir(os.path.dirname(os.getcwd()))
from utils.model_loader import get_model_fits
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import numpy as np

# params
n, pin, d = 200, 100, 8
H_star, r = 20, 5
active_scale, inactive_scale = 1.5, 0.1
x_noise, y_noise = 0.05, 0.3
rng = np.random.default_rng(42)

# orthonormal A (pin x d), A^T A = I_d
A_rand = rng.standard_normal((pin, d))
A, _ = np.linalg.qr(A_rand)

# latent Z and observed inputs X
Z = rng.standard_normal((n, d))
X = Z @ A.T + x_noise * rng.standard_normal((n, pin))

# post-activation feature map H = tanh(Z @ B^T) with anisotropy
B = rng.standard_normal((H_star, d))

# make first r rows orthonormal, scale active/inactive
Qb, _ = np.linalg.qr(B[:r, :].T)
B[:r, :] = Qb[:, :r].T
B[:r, :] *= active_scale
B[r:, :] *= inactive_scale

H = np.tanh(Z @ B.T)

# output weights supported on first r coords
w_star = np.zeros(H_star)
w_star[:r] = rng.standard_normal(r)

# targets
y = H @ w_star + y_noise * rng.standard_normal(n)

# simple train/val split indices
perm = rng.permutation(n)
n_tr = int(0.8 * n)
tr_idx, va_idx = perm[:n_tr], perm[n_tr:]

X_train, y_train = X[tr_idx], y[tr_idx]
X_test,   y_test   = X[va_idx], y[va_idx]

# quick shapes check
(X_train.shape, X_test.shape, y_train.shape, y_test.shape, H.shape)


In [None]:
import numpy as np

def effective_rank(eigs):
    p = eigs / (eigs.sum() + 1e-12)
    h = -(p * np.log(p + 1e-12)).sum()
    return float(np.exp(h))

# covariance & spectrum of H
Hc = H - H.mean(axis=0, keepdims=True)
Sigma_H = (Hc.T @ Hc) / Hc.shape[0]
eigvals, U = np.linalg.eigh(Sigma_H)
order = eigvals.argsort()[::-1]
eigvals, U = eigvals[order], U[:, order]

def align_geom(w, U, k):
    num = np.linalg.norm(U[:, :k].T @ w)**2
    den = np.linalg.norm(w)**2 + 1e-12
    return float(num / den)

print("Top-10 eigenvalues:", np.round(eigvals[:10], 3))
print("Top-5 variance fraction:", float(eigvals[:5].sum() / eigvals.sum()))
print("Effective rank (exp entropy):", effective_rank(eigvals))
for k in (1, 2, 3, 5, 10):
    print(f"Alignment@{k}:", round(align_geom(w_star, U, k), 3))

import matplotlib.pyplot as plt
plt.figure()
plt.plot(np.arange(1, len(eigvals)+1), eigvals, marker='o')
plt.xlabel('Component index')
plt.ylabel('Eigenvalue of Sigma_H')
plt.title('Post-activation spectrum (H)')
plt.show()



In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

model = LinearRegression().fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)
print(f"RMSE={rmse:.3f}, R²={r2:.3f}")

In [5]:
def run_regression_model_local(model_name, config_name, X_train, X_test, y_train, y_test, args):
    from cmdstanpy import CmdStanModel
    from utils.stan_data_generator import make_stan_data
    from utils.io_helpers import save_metadata
    import os, shutil
    import numpy as np
    
    # Set seed for reproducibility if provided
    seed = getattr(args, 'seed', None)
    if seed is not None:
        np.random.seed(seed)

    task = "prior"
    args.num_classes = 1  # Still needed

    stan_data = make_stan_data(model_name, task, X_train, y_train, X_test, args)

    model_path = f"bnn_prior_models/{model_name}.stan"
    model = CmdStanModel(stan_file=model_path, force_compile=True)

    fit = model.sample(
        data=stan_data,
        chains=4,
        iter_sampling=args.samples,
        iter_warmup=args.burnin_samples,
        adapt_delta=0.8,
        parallel_chains=4,
        show_console=False,
        #max_treedepth = 12,
    )
    
    if args.data_config == "uci": 
        if args.standardize:
            output_dir = os.path.join(
            args.model_output_dir, "standardized"
        )
        else:
            output_dir = os.path.join(
                args.model_output_dir
            )
    else:
        output_dir = os.path.join(
            args.model_output_dir
        )
    os.makedirs(output_dir, exist_ok=True)
    save_metadata(output_dir, args, config_name)

    for i, path in enumerate(fit.runset.csv_files, start=1):
        shutil.copy(path, os.path.join(output_dir, f"chain_{i}.csv"))

    print(f"[✓] Saved results to: {output_dir}")


In [None]:
import argparse
from cmdstanpy import set_cmdstan_path

# Sett CmdStan-stien
set_cmdstan_path("/Users/augustarnstad/.cmdstan/cmdstan-2.36.0")
run_regression_model_local(
    model_name="dirichlet_student_t",
    config_name="latent_model",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    args=argparse.Namespace(
        N=X_train.shape[0],
        p=X_train.shape[1],
        sigma=None,
        data="",
        standardize=False,
        test_shift=None,
        model="dirichlet_student_t",
        H=16,
        L=1,
        config="Latent",
        seed=1,
        data_config="realworld",
        model_output_dir="results/ridgeless/alignment/priors/dirichlet_student_t_tanh",
        burnin_samples=1000,
        samples=1000,
    )
)

In [None]:

run_regression_model_local(
    model_name="dirichlet_horseshoe",
    config_name="latent_model",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    args=argparse.Namespace(
        N=X_train.shape[0],
        p=X_train.shape[1],
        sigma=None,
        data="",
        standardize=False,
        test_shift=None,
        model="dirichlet_horseshoe",
        H=16,
        L=1,
        config="Latent",
        seed=1,
        data_config="realworld",
        model_output_dir="results/ridgeless/alignment/priors/dirichlet_horseshoe_tanh",
        burnin_samples=1000,
        samples=1000,
    )
)

run_regression_model_local(
    model_name="regularized_horseshoe",
    config_name="latent_model",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    args=argparse.Namespace(
        N=X_train.shape[0],
        p=X_train.shape[1],
        sigma=None,
        data="",
        standardize=False,
        test_shift=None,
        model="regularized_horseshoe",
        H=16,
        L=1,
        config="Latent",
        seed=1,
        data_config="realworld",
        model_output_dir="results/ridgeless/alignment/priors/regularized_horseshoe_tanh",
        burnin_samples=1000,
        samples=1000,
    )
)

run_regression_model_local(
    model_name="gaussian",
    config_name="latent_model",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    args=argparse.Namespace(
        N=X_train.shape[0],
        p=X_train.shape[1],
        sigma=None,
        data="",
        standardize=False,
        test_shift=None,
        model="gaussian",
        H=16,
        L=1,
        config="Latent",
        seed=1,
        data_config="realworld",
        model_output_dir="results/ridgeless/alignment/priors/gaussian_tanh",
        burnin_samples=1000,
        samples=1000,
    )
)

In [None]:
# Visualizing why allocating larger ξ to larger |β| reduces the negative log-posterior
# Focus on p=2 (so ξ2 = 1 − ξ1). Includes:
# 1) Objective vs ξ1 for α ∈ {0.5, 1.5, 3.0} and several (β1, β2) pairs.
# 2) Marginal gain plot: β1^2/ξ1^2 − β2^2/(1−ξ1)^2 (sign tells which way to move mass).

import numpy as np
import matplotlib.pyplot as plt

# ξ-part of the negative log-posterior for p=2 at fixed betas and α
def nll_xi1(xi1, beta1, beta2, tau=1.0, alpha1=1.5, alpha2=1.5, eps=1e-12):
    # keep ξ in (0,1) to avoid log/division issues
    xi1 = np.clip(xi1, eps, 1 - eps)
    xi2 = 1.0 - xi1
    # Prior from β|ξ ~ N(0, τ^2 diag(ξ)):
    quad = 0.5 * (beta1**2 / xi1 + beta2**2 / xi2) / (tau**2)
    normal_norm = 0.5 * (np.log(xi1) + np.log(xi2))  # normalization term from Gaussian prior
    # Dirichlet prior on ξ ⇒ (3/2 − α_i) log ξ_i in the NLL
    dirichlet_term = (1.5 - alpha1) * np.log(xi1) + (1.5 - alpha2) * np.log(xi2)
    return quad + normal_norm + dirichlet_term  # up to constants w.r.t. ξ

# Config
beta_pairs = [(10.0, 1.0)]#, (1.0, 1.0), (1.0, 2.0)]
alphas = [0.1]
tau = 1.0
xi1_grid = np.linspace(1e-4, 1 - 1e-4, 2000)

plt.figure(figsize=(8, 5))
for b1, b2 in beta_pairs:
    xi = xi1_grid
    marg_gain = (b1**2)/(xi**2) - (b2**2)/((1 - xi)**2)
    marg_gain = np.clip(marg_gain, -1e3, 1e3)  # keep plot readable near boundaries
    plt.plot(xi, marg_gain, label=f"|β|=({abs(b1):.1f},{abs(b2):.1f})")
    x_star = abs(b1) / (abs(b1) + abs(b2))
    plt.axhline(0.0, linestyle="-", linewidth=1)
    plt.axvline(x_star, linestyle="--", alpha=0.8)
    plt.axvline(0.5, linestyle=":", alpha=0.8)

# plt.title("Marginal gain of shifting mass to ξ₁ (no Dirichlet term)\n>0 ⇒ shift to ξ₁; <0 ⇒ shift to ξ₂")
# plt.xlabel("ξ₁   (with ξ₂ = 1 − ξ₁)")
# plt.ylabel("β₁²/ξ₁² − β₂²/(1−ξ₁)²")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
xi_1 = np.array([0.25, 0.25, 0.25, 0.25])
xi_2 = np.array([0.75, 0.2, 0.03, 0.02])
xi_3 = np.array([0.02, 0.03, 0.2, 0.75])

beta = np.array([5, 2, 1, 0.25])

print("First: ", np.sum((beta**2)/(xi_1)) + np.sum(1.4*np.log(xi_1)), "\n")
print("Second: ", np.sum((beta**2)/(xi_2)) + np.sum(1.4*np.log(xi_2)), "\n")
print("Third: ", np.sum((beta**2)/(xi_3)) + np.sum(1.4*np.log(xi_3)), "\n")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Dirichlet–Gaussian objective on the 2-simplex (p=3), up to constants
def F(xi1, xi2, beta, tau=1.0, alpha=1.2):  # use alpha != 1.5 to show Dirichlet effect
    xi3 = 1.0 - xi1 - xi2
    if xi3 <= 0: 
        return np.nan
    quad = 0.5 * np.sum((np.array(beta)**2) / (tau**2 * np.array([xi1, xi2, xi3])))
    logterm = (1.5 - alpha) * np.sum(np.log([xi1, xi2, xi3]))
    return quad + logterm

# grid over simplex
n = 500
x = np.linspace(1e-4, 1 - 1e-4, n)
xi1, xi2 = np.meshgrid(x, x)
mask = (xi1 + xi2 < 1.0)

beta = [1.0, 1.0, 1.0]      # squared coefficients drive where the minimum is
alpha = 0.5                # < 1.5 encourages concentration (stronger coupling)
tau   = 1.0

Z = np.full_like(xi1, np.nan, dtype=float)
for i in range(n):
    for j in range(n):
        if mask[i, j]:
            Z[i, j] = F(xi1[i, j], xi2[i, j], beta, tau=tau, alpha=alpha)

# find argmin (on grid)
k = np.nanargmin(Z)
i_min, j_min = np.unravel_index(k, Z.shape)
x1_min, x2_min = float(xi1[i_min, j_min]), float(xi2[i_min, j_min])

# color-scale fix: clip extremes so interior structure is visible
vmin = np.nanpercentile(Z, 1)
vmax = np.nanpercentile(Z, 95)  # ignore huge boundary spikes

plt.figure(figsize=(6,5))
im = plt.imshow(Z, origin='lower', extent=[0,1,0,1], cmap='viridis',
                vmin=vmin, vmax=vmax, aspect='equal')
plt.colorbar(im, label="Negative log-prior  F(ξ)  (clipped)")

# simplex boundary
t = np.linspace(0,1,400)
plt.plot(t, 1-t, 'w--', lw=1)

# mark argmin
plt.scatter([x1_min], [x2_min], c='red', s=40, marker='*', zorder=3, label='argmin')

plt.title(f"Dirichlet–Gaussian over simplex\nβ={tuple(beta)},  α={alpha}")
plt.xlabel("ξ₁"); plt.ylabel("ξ₂")
plt.xlim(0,1); plt.ylim(0,1)
plt.legend(loc='lower left')
plt.tight_layout()
plt.show()
