In [None]:
import sys, os; sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__) if '__file__' in globals() else os.getcwd(), '..')))
from utils.model_loader import get_model_fits
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data_dir = f"datasets/abalone"
results_dir_relu = "results/regression/single_layer/relu/abalone"
results_dir_tanh = "results/regression/single_layer/tanh/abalone"
#model_names_relu = ["Dirichlet Student T"]
model_names_relu = ["Gaussian", "Regularized Horseshoe", "Dirichlet Horseshoe", "Dirichlet Student T"]
model_names_tanh = ["Gaussian tanh", "Regularized Horseshoe tanh", "Dirichlet Horseshoe tanh", "Dirichlet Student T tanh"]


full_config_path = "abalone_N3341_p8"
# relu_fit = get_model_fits(
#     config=full_config_path,
#     results_dir=results_dir_relu,
#     models=model_names_relu,
#     include_prior=False,
# )

tanh_fit = get_model_fits(
    config=full_config_path,
    results_dir=results_dir_tanh,
    models=model_names_tanh,
    include_prior=False,
)

In [3]:
from utils.generate_data import load_abalone_regression_data
X, X_test, y, y_test = load_abalone_regression_data(standardized=False, frac=1.0)
# Coerce everything to plain float64 NumPy arrays
X      = np.asarray(X, dtype=float)
X_test = np.asarray(X_test, dtype=float)

# y often comes as a (n,1) DataFrame/array — flatten to (n,)
y      = np.asarray(y, dtype=float).reshape(-1)
y_test = np.asarray(y_test, dtype=float).reshape(-1)


In [4]:
import numpy as np
from numpy.linalg import cholesky, solve
from utils.kappa_matrix import shrinkage_matrix_stable

def build_operators_from_PS(P, S):
    """
    P, S: arrays of shape (S, d, d), SPD per sample.
    Returns:
      G        : P^{-1/2} S P^{-1/2}
      shrink_PS: (P+S)^{-1} S
      shrink_G : (I+G)^{-1} G
    """
    S_, d, _ = P.shape
    G         = np.empty_like(P, dtype=np.float64)
    shrink_PS = np.empty_like(P, dtype=np.float64)
    shrink_G  = np.empty_like(P, dtype=np.float64)

    I = np.eye(d)

    for s in range(S_):
        Ps = P[s]; Ss = S[s]

        # --- G = P^{-1/2} S P^{-1/2} via Cholesky (Ps = C C^T) -> C^{-T} S C^{-1}
        C = cholesky(Ps)            # upper-triangular by NumPy convention
        # temp = C^{-1}^T S
        temp = solve(C.T, Ss)#, assume_a='sym')    # solves C^T X = S  -> X = C^{-T} S
        Gs   = solve(C, temp.T)#, assume_a='sym').T  # solves C Y^T = temp^T -> Y = C^{-1} temp
        G[s] = Gs

        # # --- (P+S)^{-1} S
        Rs = shrinkage_matrix_stable(Ps, Ss)
        # A = Ps + Ss
        # L = cholesky(A)
        # # Solve A X = S  (two triangular solves)
        # Y = solve(L, Ss)#, lower=False)           # L X = S  (NumPy returns upper L; set lower=False)
        # X = solve(L.T, Y)#, lower=True)          # L^T X = Y
        # shrink_PS[s] = X
        shrink_PS[s] = np.eye(Ps.shape[0]) - Rs
        

        # --- (I+G)^{-1} G
        B = I + Gs
        LB = cholesky(B)
        YB = solve(LB, Gs)#, lower=False)
        XB = solve(LB.T, YB)#, lower=True)
        shrink_G[s] = XB

    return G, shrink_PS, shrink_G


# Example usage after reloading a saved NPZ:
dat = np.load("Abalone_matrices/Gaussian_PS.npz")
P_gauss, S_gauss = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_gauss, shrink_PS_gauss, shrink_G_gauss = build_operators_from_PS(P_gauss, S_gauss)

dat = np.load("Abalone_matrices/Regularized_Horseshoe_PS.npz")
P_RHS, S_RHS = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_RHS, shrink_PS_RHS, shrink_G_RHS = build_operators_from_PS(P_RHS, S_RHS)

dat = np.load("Abalone_matrices/Dirichlet_Horseshoe_PS.npz")
P_DHS, S_DHS = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_DHS, shrink_PS_DHS, shrink_G_DHS = build_operators_from_PS(P_DHS, S_DHS)

dat = np.load("Abalone_matrices/Dirichlet_StudentT_PS.npz")
P_DST, S_DST = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_DST, shrink_PS_DST, shrink_G_DST = build_operators_from_PS(P_DST, S_DST)


# Example usage after reloading a saved NPZ:
dat = np.load("Abalone_matrices/Gaussian_sparsity_90_PS.npz")
P_gauss_sparse, S_gauss_sparse = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_gauss_sparse, shrink_PS_gauss_sparse, shrink_G_gauss_sparse = build_operators_from_PS(P_gauss_sparse, S_gauss_sparse)

dat = np.load("Abalone_matrices/Regularized_Horseshoe_sparsity_90_PS.npz")
P_RHS_sparse, S_RHS_sparse = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_RHS_sparse, shrink_PS_RHS_sparse, shrink_G_RHS_sparse = build_operators_from_PS(P_RHS_sparse, S_RHS_sparse)

dat = np.load("Abalone_matrices/Dirichlet_Horseshoe_sparsity_90_PS.npz")
P_DHS_sparse, S_DHS_sparse = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_DHS_sparse, shrink_PS_DHS_sparse, shrink_G_DHS = build_operators_from_PS(P_DHS_sparse, S_DHS_sparse)

dat = np.load("Abalone_matrices/Dirichlet_StudentT_sparsity_90_PS.npz")
P_DST_sparse, S_DST_sparse = dat["P"].astype(np.float64), dat["S"].astype(np.float64)
G_DST_sparse, shrink_PS_DST_sparse, shrink_G_DST_sparse = build_operators_from_PS(P_DST_sparse, S_DST_sparse)



In [5]:
import numpy as np

def idempotence_likeness(A, eps=1e-12):
    likeness = np.linalg.norm(A @ A - A, 'fro') / (np.linalg.norm(A, 'fro') + eps)
    return likeness

def svd_effective_rank(A, eps=1e-12):
    """
    Returns:
      s_sorted : singular values sorted descending
      rho      : normalized singular weights (s / ||s||_1)
      erank    : exp(Shannon entropy of rho)
    """
    # singular values only
    s = np.linalg.svd(A, compute_uv=False)
    s_sorted = np.sort(s)[::-1]
    s_sum = s_sorted.sum()
    if s_sum <= eps:
        # all-zero block: define uniform weights
        rho = np.ones_like(s_sorted) / len(s_sorted)
    else:
        rho = s_sorted / s_sum
    H = -np.sum(rho * np.log(rho + eps))
    erank = float(np.exp(H))
    return s_sorted, rho, erank


In [14]:
S = 4000
blocks = 16
p=8

erank_gauss = np.zeros((S, blocks))
erank_RHS = np.zeros((S, blocks))
erank_DHS = np.zeros((S, blocks))
erank_DST = np.zeros((S, blocks))

idempotent_gauss = np.zeros((S, blocks))
idempotent_RHS = np.zeros((S, blocks))
idempotent_DHS = np.zeros((S, blocks))
idempotent_DST = np.zeros((S, blocks))

for b in range(blocks):
    for i in range(S):
        _, _, erank_gauss[i, b] = svd_effective_rank(shrink_PS_gauss[i, (b*p):(p+b*p), (b*p):(p+b*p)])
        _, _, erank_RHS[i, b]   = svd_effective_rank(shrink_PS_RHS[i, (b*p):(p+b*p), (b*p):(p+b*p)])
        _, _, erank_DHS[i, b]   = svd_effective_rank(shrink_PS_DHS[i, (b*p):(p+b*p), (b*p):(p+b*p)])
        _, _, erank_DST[i, b]   = svd_effective_rank(shrink_PS_DST[i, (b*p):(p+b*p), (b*p):(p+b*p)])
        
        idempotent_gauss[i, b] = idempotence_likeness(shrink_PS_gauss[i, (b*p):(p+b*p), (b*p):(p+b*p)])
        idempotent_RHS[i, b]   = idempotence_likeness(shrink_PS_RHS[i, (b*p):(p+b*p), (b*p):(p+b*p)])
        idempotent_DHS[i, b]   = idempotence_likeness(shrink_PS_DHS[i, (b*p):(p+b*p), (b*p):(p+b*p)])
        idempotent_DST[i, b]   = idempotence_likeness(shrink_PS_DST[i, (b*p):(p+b*p), (b*p):(p+b*p)])

In [None]:
plt.figure()
plt.plot(np.arange(0, 16), erank_gauss.mean(axis=0), label="Gauss", marker='o')
plt.plot(np.arange(0, 16), erank_RHS.mean(axis=0), label="RHS", marker='o')
plt.plot(np.arange(0, 16), erank_DHS.mean(axis=0), label="DHS", marker='o')
plt.plot(np.arange(0, 16), erank_DST.mean(axis=0), label="DST", marker='o')
plt.title("Effective rank of blocks")
plt.xlabel("Block")
plt.ylabel("Value")
plt.legend()
plt.show()

In [None]:
erank_gauss.mean(axis=0)

plt.figure()
plt.plot(np.arange(0, 16), idempotent_gauss.mean(axis=0), label="Gauss", marker='o')
plt.plot(np.arange(0, 16), idempotent_RHS.mean(axis=0), label="RHS", marker='o')
plt.plot(np.arange(0, 16), idempotent_DHS.mean(axis=0), label="DHS", marker='o')
plt.plot(np.arange(0, 16), idempotent_DST.mean(axis=0), label="DST", marker='o')
plt.title("Idempotence error of blocks")
plt.xlabel("Block")
plt.ylabel("Value")
plt.legend()
plt.show()

In [132]:
# =========================
# 1) BLOCK THE MATRICES
# =========================
def make_block_lists(shrink_3d, block_size=8):
    """
    shrink_3d: array (M, D, D), M samples, D divisible by block_size.
    Returns: list of length H = D//block_size.
             Each element is a list of length M with (block_size x block_size) arrays.
             blocks[h][m] = block h from sample m.
    """
    M, D, _ = shrink_3d.shape
    assert D % block_size == 0, "D must be divisible by block_size"
    H = D // block_size
    blocks = []
    for h in range(H):
        r = slice(h*block_size, (h+1)*block_size)
        c = slice(h*block_size, (h+1)*block_size)
        blocks.append([shrink_3d[m, r, c] for m in range(M)])
    return blocks

blocks_gauss = make_block_lists(shrink_PS_gauss, block_size=8)
blocks_RHS   = make_block_lists(shrink_PS_RHS,   block_size=8)
blocks_DHS   = make_block_lists(shrink_PS_DHS,   block_size=8)
blocks_DST   = make_block_lists(shrink_PS_DST,   block_size=8)

In [None]:
import numpy as np
import matplotlib.pyplot as plt


# ===========================================
# 2) MEAN-REF SUBSPACE + WITHIN-PRIOR COSINES
# ===========================================
def topk_left_singular_vectors_meanref(blocks_for_one_diag_block, k=None, energy=0.90, symmetrize=True):
    """
    blocks_for_one_diag_block: list of A_m (p x p) across M samples for a single diagonal block.
    Returns (U_ref_k, s_ref_k) where U_ref_k is (p x k).
    """
    Abar = np.mean(np.stack(blocks_for_one_diag_block, axis=0), axis=0)
    if symmetrize:
        Abar = 0.5 * (Abar + Abar.T)
    U_ref, s_ref, _ = np.linalg.svd(Abar, full_matrices=False)

    if k is None:
        ssum = s_ref.sum()
        if ssum <= 1e-12:
            k = 1
        else:
            cume = np.cumsum(s_ref) / ssum
            k = int(np.searchsorted(cume, energy) + 1)

    return U_ref[:, :k], s_ref[:k]


def within_prior_overlap_meanref(blocks_for_one_diag_block, k=None, energy=0.90, symmetrize=True):
    """
    Cosines of principal angles between each sample's top-k subspace and
    the reference subspace built from the MEAN block.
    Returns: C with shape (M, K), M samples, K directions.
    """
    U_ref, _ = topk_left_singular_vectors_meanref(blocks_for_one_diag_block, k=k, energy=energy, symmetrize=symmetrize)
    K = U_ref.shape[1]
    cosines = []
    for A in blocks_for_one_diag_block:
        if symmetrize:
            A = 0.5 * (A + A.T)
        U, _, _ = np.linalg.svd(A, full_matrices=False)
        U_k = U[:, :K]
        sig = np.linalg.svd(U_ref.T @ U_k, compute_uv=False)  # cosines of principal angles
        cosines.append(np.sort(sig)[::-1])  # descending order
    return np.array(cosines)  # (M, K)


# ===========================================
# 3) PLOT: COMPARE FOUR MODELS FOR ONE BLOCK
# ===========================================
def plot_within_strip_four_models_for_block(
    blocks_gauss, blocks_RHS, blocks_DHS, blocks_DST,
    block_index=0, block_size=8, energy=0.90, symmetrize=True, k=None,
    labels=('Gaussian', 'RHS', 'Dirichlet-HS', 'Dirichlet-Student-t')
):
    """
    Overlays sample-wise principal-angle cosines for ONE diagonal block across FOUR models.
    Each model is jittered left/right per principal direction for visual separation.
    """
    # Block the matrices

    # Compute within-prior cosines (mean-ref) for this block
    C_G  = within_prior_overlap_meanref(blocks_gauss[block_index], k=k, energy=energy, symmetrize=symmetrize)
    C_R  = within_prior_overlap_meanref(blocks_RHS[block_index], k=k,   energy=energy, symmetrize=symmetrize)
    C_DH = within_prior_overlap_meanref(blocks_DHS[block_index], k=k,   energy=energy, symmetrize=symmetrize)
    C_DT = within_prior_overlap_meanref(blocks_DST[block_index], k=k,   energy=energy, symmetrize=symmetrize)

    # Use a common number of directions across the four models
    K = min(C_G.shape[1], C_R.shape[1], C_DH.shape[1], C_DT.shape[1])
    C_G, C_R, C_DH, C_DT = C_G[:, :K], C_R[:, :K], C_DH[:, :K], C_DT[:, :K]

    # Plot
    plt.figure(figsize=(7,4))
    jitter = 0.05
    # Model offsets per direction
    offsets = (-1.5*jitter, -0.5*jitter, +0.5*jitter, +1.5*jitter)
    arrays  = (C_G, C_R, C_DH, C_DT)

    model_colors = {
        labels[0]: 'tab:blue',
        labels[1]: 'tab:orange',
        labels[2]: 'tab:green',
        labels[3]: 'tab:red',
    }

    # replace the plotting loop with this:
    for arr, off, lab in zip(arrays, offsets, labels):
        M, K = arr.shape
        col = model_colors[lab]          # <-- same color for all directions of this model
        for k in range(K):
            x = np.full(M, k+1) + off + 0.03*np.random.randn(M)
            plt.plot(x, arr[:, k], '.', alpha=0.35, markersize=6,
                    color=col, label=lab if k == 0 else None)

    plt.axhline(0.9, lw=1, alpha=0.2)
    plt.xticks(range(1, K+1))
    plt.ylim(0, 1.02)
    plt.xlabel('Principal direction index (top-k of mean-ref subspace)')
    plt.ylabel('Cosine overlap to mean-ref')
    plt.title(f'Within-prior principal-angle cosines — block {block_index}')
    plt.legend(ncol=2, frameon=False)
    plt.tight_layout()
    plt.show()


# =========================
# Example usage (block 0)
# =========================
plot_within_strip_four_models_for_block(
    blocks_gauss=blocks_gauss, blocks_RHS=blocks_RHS, blocks_DHS=blocks_DHS, blocks_DST=blocks_DST,
    block_index=6, block_size=8, energy=0.90, symmetrize=True, k=None,
    labels=('Gaussian','RHS','Dirichlet-HS','Dirichlet-Student-t')
)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# ---------- helpers ----------
def make_block_lists(shrink_3d, block_size=8):
    M, D, _ = shrink_3d.shape
    H = D // block_size
    return [[shrink_3d[m, h*block_size:(h+1)*block_size, h*block_size:(h+1)*block_size]
             for m in range(M)] for h in range(H)]

def top_r_mean_ref(blocks_one, r, symmetrize=True):
    Abar = np.mean(np.stack(blocks_one, axis=0), axis=0)
    if symmetrize: Abar = 0.5*(Abar + Abar.T)
    U, s, _ = np.linalg.svd(Abar, full_matrices=False)
    return U[:, :r], s[:r]

def within_cosines_meanref(blocks_one, r, K=None, energy=0.90, symmetrize=True):
    Uref, _ = top_r_mean_ref(blocks_one, r, symmetrize=symmetrize)
    cosines = []
    for A in blocks_one:
        if symmetrize: A = 0.5*(A + A.T)
        U, s, _ = np.linalg.svd(A, full_matrices=False)
        if K is None:
            cs = np.cumsum(s)/max(s.sum(), 1e-12)
            K_eff = max(r, int(np.searchsorted(cs, energy) + 1))
        else:
            K_eff = max(r, K)
        sig = np.linalg.svd(Uref.T @ U[:, :K_eff], compute_uv=False)
        cosines.append(sig[:r])   # top r cosines
    return np.array(cosines)      # (M, r)

# ---------- compute ALL blocks -> heatmaps ----------
def model_heatmap(shrink_3d, r=4, block_size=8, energy=0.90, symmetrize=True, agg='median'):
    blocks = make_block_lists(shrink_3d, block_size)
    stats = []
    for b in range(len(blocks)):                      # per block
        C = within_cosines_meanref(blocks[b], r=r, K=None, energy=energy, symmetrize=symmetrize)
        stat = np.median(C, axis=0) if agg=='median' else np.mean(C, axis=0)
        stats.append(stat)                            # (r,)
    return np.array(stats)                            # (num_blocks, r)

def plot_four_heatmaps(gauss, rhs, dhs, dst, r=4, titles=('Gaussian','RHS','Dirichlet-HS','Dirichlet-Student-t')):
    mats = [
        model_heatmap(gauss, r=r),
        model_heatmap(rhs,   r=r),
        model_heatmap(dhs,   r=r),
        model_heatmap(dst,   r=r),
    ]
    fig, axes = plt.subplots(2, 2, figsize=(10,6), sharex=True, sharey=True)
    axes = axes.ravel()
    for ax, M, title in zip(axes, mats, titles):
        im = ax.imshow(M, vmin=0, vmax=1, aspect='auto', cmap='viridis')
        ax.set_title(title)
        ax.set_xlabel('Principal direction (1..r)')
        ax.set_ylabel('Block index')
    fig.colorbar(im, ax=axes.tolist(), fraction=0.02, pad=0.02)
    fig.suptitle('Within-prior cosine (to mean-ref) — median over samples', y=0.99)
    plt.tight_layout()
    plt.show()

# ---------- usage ----------
# r = number of directions to summarize (e.g., 3–5 is plenty)
plot_four_heatmaps(shrink_PS_gauss, shrink_PS_RHS, shrink_PS_DHS, shrink_PS_DST, r=4)


In [None]:
import numpy as np

def shrinkage_feature_correlation(X, A):
    """
    X: (n, p) feature matrix
    A: (p, p) shrinkage matrix (single block)
    
    Returns scalar correlation (r) between |corr(X)| and |A|.
    """
    # Feature correlation
    corr_X = np.corrcoef(X, rowvar=False)  # shape (p, p)

    # Take upper triangles (excluding diagonal)
    iu = np.triu_indices_from(corr_X, k=1)
    x_vals = np.abs(corr_X[iu])
    a_vals = np.abs(A[iu])

    # Pearson correlation between flattened patterns
    r = np.corrcoef(x_vals, a_vals)[0, 1]
    return r, corr_X

means_gauss = np.zeros(len(blocks_RHS))
means_RHS = np.zeros(len(blocks_DHS))
means_DHS = np.zeros(len(blocks_RHS))
means_DST = np.zeros(len(blocks_DHS))

for b in range(len(blocks_RHS)):
    means_gauss[b] = np.mean([shrinkage_feature_correlation(X, A)[0] for A in blocks_gauss[b]])
    means_RHS[b] = np.mean([shrinkage_feature_correlation(X, A)[0] for A in blocks_RHS[b]])
    means_DHS[b] = np.mean([shrinkage_feature_correlation(X, A)[0] for A in blocks_DHS[b]])
    means_DST[b] = np.mean([shrinkage_feature_correlation(X, A)[0] for A in blocks_DST[b]])
    



In [None]:
plt.figure()
plt.plot(np.arange(0, 16), means_gauss, label="Gauss", marker='o')
plt.plot(np.arange(0, 16), means_RHS, label="RHS", marker='o')
plt.plot(np.arange(0, 16), means_DHS, label="DHS", marker='o')
plt.plot(np.arange(0, 16), means_DST, label="DST", marker='o')
plt.title("Mean alignment of blockwise shrinkage to correlation of features")
plt.xlabel("Block")
plt.ylabel("Value")
plt.legend()
plt.show()

In [147]:
def frobenius_correlation(A, B):
    A_c = A - A.mean()
    B_c = B - B.mean()
    return np.sum(A_c * B_c) / (np.linalg.norm(A_c, 'fro') * np.linalg.norm(B_c, 'fro'))


In [None]:
blocks = 16
p=8
S = 4000
rho_F_gauss = np.zeros((S, blocks))
rho_F_RHS = np.zeros((S, blocks))
rho_F_DHS = np.zeros((S, blocks))
rho_F_DST = np.zeros((S, blocks))
corr_X = np.corrcoef(X, rowvar=False)

for b in range(blocks):
    for i in range (S):
        rho_F_gauss[i][b] = frobenius_correlation(shrink_PS_gauss[i, (b*p):(p+b*p), (b*p):(p+b*p)], corr_X)
        rho_F_RHS[i][b] = frobenius_correlation(shrink_PS_RHS[i, (b*p):(p+b*p), (b*p):(p+b*p)], corr_X)
        rho_F_DHS[i][b] = frobenius_correlation(shrink_PS_DHS[i, (b*p):(p+b*p), (b*p):(p+b*p)], corr_X)
        rho_F_DST[i][b] = frobenius_correlation(shrink_PS_DST[i, (b*p):(p+b*p), (b*p):(p+b*p)], corr_X)
        

In [None]:
plt.figure()
plt.plot(np.arange(0, 16), rho_F_gauss.mean(axis=0), label="Gauss", marker='o')
plt.plot(np.arange(0, 16), rho_F_RHS.mean(axis=0), label="RHS", marker='o')
plt.plot(np.arange(0, 16), rho_F_DHS.mean(axis=0), label="DHS", marker='o')
plt.plot(np.arange(0, 16), rho_F_DST.mean(axis=0), label="DST", marker='o')
plt.title("Mean alignment of blockwise shrinkage to correlation of features")
plt.xlabel("Block")
plt.ylabel("Value")
plt.legend()
plt.show()

In [None]:
corr_X = np.corrcoef(X, rowvar=False)
rho_F_RHS = frobenius_correlation(shrink_PS_RHS[1, 0:8, 0:8], corr_X)
rho_F_DHS = frobenius_correlation(shrink_PS_DHS[1, 0:8, 0:8], corr_X)
print(f"Frobenius correlation between RHS shrinkage and feature corr: {rho_F_RHS:.3f}")
print(f"Frobenius correlation between DHS shrinkage and feature corr: {rho_F_DHS:.3f}")


In [6]:
from utils.kappa_matrix import visualize_models

matrices_S = [
    np.mean(S_gauss, axis=0),
    np.mean(S_RHS, axis=0),
    np.mean(S_DHS, axis=0),
    np.mean(S_DST, axis=0),
]
names_S = ["S (Gauss)", "S (RHS)", "S (DHS)", "S (DST)"]

matrices_G = [
    np.mean((G_gauss), axis=0),
    np.mean((G_RHS), axis=0),
    np.mean((G_DHS), axis=0),
    np.mean((G_DST), axis=0),
]

names_G = ["G (Gauss)", "G (RHS)", "G (DHS)", "G (DST)"]

matrices_shrink = [
    np.mean((shrink_G_gauss), axis=0),
    np.mean((shrink_G_RHS), axis=0),
    np.mean((shrink_G_DHS), axis=0),
    np.mean((shrink_G_DST), axis=0),
]

names_shrink = ["(I+G)^{-1}G (Gauss)", "(I+G)^{-1}G (RHS)", "(I+G)^{-1}G (DHS)", "(I+G)^{-1}G (DST)"]

matrices_operator = [
    np.mean((shrink_PS_gauss), axis=0),
    np.mean((shrink_PS_RHS), axis=0),
    np.mean((shrink_PS_DHS), axis=0),
    np.mean((shrink_PS_DST), axis=0),
]

names_operator = ["(P+S)^{-1}S (Gauss)", "(P+S)^{-1}S (RHS)", "(P+S)^{-1}S (DHS)", "(P+S)^{-1}S (DST)"]

In [None]:
visualize_models(matrices_S, names_S, H=16, p=8, use_abs=False)

In [None]:
visualize_models(matrices_G, names_G, H=16, p=8, use_abs=False)#, cmap="magma")


In [None]:
visualize_models(matrices_shrink, names_shrink, H=16, p=8, use_abs=False)#, cmap="magma")

In [None]:
visualize_models(matrices_operator, names_operator, H=16, p=8, use_abs=False)#, cmap="magma")

In [7]:
# --- Traces as distributions (df_eff = tr(R) vs total shrinkage = tr(I-R)) ---
import matplotlib.pyplot as plt

# Effective dof: trace of (I+G)^{-1}G per draw
tr_R_gauss = np.trace(shrink_G_gauss, axis1=1, axis2=2)
tr_R_RHS   = np.trace(shrink_G_RHS,   axis1=1, axis2=2)
tr_R_DHS   = np.trace(shrink_G_DHS,   axis1=1, axis2=2)
tr_R_DST   = np.trace(shrink_G_DST,   axis1=1, axis2=2)

# If you also want “total shrinkage”, use your SP_inv_S_* stacks (I - R):
tr_SPinvS_gauss = np.trace(shrink_PS_gauss, axis1=1, axis2=2)
tr_SPinvS_RHS   = np.trace(shrink_PS_RHS,   axis1=1, axis2=2)
tr_SPinvS_DHS   = np.trace(shrink_PS_DHS,   axis1=1, axis2=2)
tr_SPinvS_DST   = np.trace(shrink_PS_DST,   axis1=1, axis2=2)



In [None]:
# Plot df_eff distributions
plt.figure(figsize=(8,4), dpi=150)
bins = 40
plt.hist(tr_R_gauss, bins=bins, alpha=0.5, label="Gauss")
plt.hist(tr_R_RHS,   bins=bins, alpha=0.5, label="RHS")
plt.hist(tr_R_DHS,   bins=bins, alpha=0.5, label="DHS")
plt.hist(tr_R_DST,   bins=bins, alpha=0.5, label="DST")
plt.xlabel("trace((I+G)^{-1}G)  [effective dof]")
plt.ylabel("count")
plt.legend()
plt.tight_layout()
plt.show()

plt.figure(figsize=(5,4), dpi=150)
bins = 40
plt.hist(tr_SPinvS_gauss, bins=bins, alpha=0.5, label="Gauss")
plt.hist(tr_SPinvS_RHS,   bins=bins, alpha=0.5, label="RHS")
plt.hist(tr_SPinvS_DHS,   bins=bins, alpha=0.5, label="DHS")
plt.hist(tr_SPinvS_DST,   bins=bins, alpha=0.5, label="DST")
plt.xlabel(r"$tr((P+S)^{-1}S)$")
plt.ylabel("Frequency")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# --- Median eigenvalue curve (with bands) for shrink stacks ---
import matplotlib.pyplot as plt

def median_eigcurve(stack, q_lo=0.1, q_hi=0.9):
    """
    stack: (D, N, N) of symmetric PSD matrices with eigenvalues in [0,1].
    Returns: dict with 'median', 'lo', 'hi' over the sorted eigenvalues (descending).
    """
    D, N, _ = stack.shape
    evals = np.empty((D, N))
    for d in range(D):
        w = np.linalg.eigvalsh(stack[d])
        evals[d] = np.sort(w)[::-1]  # descending
    med = np.median(evals, axis=0)
    lo  = np.quantile(evals, q_lo, axis=0)
    hi  = np.quantile(evals, q_hi, axis=0)
    return {"median": med, "lo": lo, "hi": hi}

curves = {
    "Gauss": median_eigcurve(shrink_G_gauss),
    "RHS":   median_eigcurve(shrink_G_RHS),
    "DHS":   median_eigcurve(shrink_G_DHS),
    "DST":   median_eigcurve(shrink_G_DST),
}

# Plot 2x2 small multiples
fig, axes = plt.subplots(2, 2, figsize=(8,6), dpi=150, constrained_layout=True)
axes = axes.ravel()
for ax, (name, c) in zip(axes, curves.items()):
    x = np.arange(1, len(c["median"])+1)
    ax.plot(x, c["median"], lw=1.8, label=f"{name} median")
    ax.fill_between(x, c["lo"], c["hi"], alpha=0.25, label=f"{name} {10}-{90}%", step=None)
    ax.set_title(name)
    ax.set_xlabel("eigenvalue rank")
    ax.set_ylabel("eigenvalue of (I+G)^{-1}G")
    ax.set_ylim(0, 1)
    ax.legend(loc="upper right", fontsize=8)
plt.show()


In [24]:
import numpy as np

def plot_point1_aligned(A, B, nameA="A", nameB="B",
                        H=16, p=8, use_abs=False, q_low=0.05, q_high=0.99):
    """
    Point (1): Best-scale–aligned difference.
      - Panel 1: A
      - Panel 2: c*·B  (c* = <A,B>_F / ||B||_F^2)
      - Panel 3: A - c*·B
      - Panel 4: (blank filler)
    """
    A = np.asarray(A, float); B = np.asarray(B, float)
    num = np.sum(A * B)
    den = np.sum(B * B) if np.sum(B * B) != 0 else 1.0
    c_star = num / den
    cosF = num / (np.linalg.norm(A, "fro") * (np.linalg.norm(B, "fro") + 1e-12))

    mats  = [A, c_star * B, A - c_star * B, np.zeros_like(A)]
    names = [
        f"{nameA}",
        f"{nameB} scaled (c*={c_star:.3g})",
        f"Aligned diff: {nameA} − c*·{nameB}\ncos_F={cosF:.3f}",
        "(unused)"
    ]
    return mats, names #visualize_models(mats, names, H=H, p=p, use_abs=use_abs, q_low=q_low, q_high=q_high)

def plot_point2_unit_energy(A, B, nameA="A", nameB="B",
                            H=16, p=8, use_abs=False, q_low=0.05, q_high=0.99):
    """
    Point (2): Unit-energy (Frobenius-normalized) side-by-side + difference.
      - Panel 1: A / ||A||_F
      - Panel 2: B / ||B||_F
      - Panel 3: (A/||A||_F) − (B/||B||_F)
      - Panel 4: (unused filler)
    """
    A = np.asarray(A, float); B = np.asarray(B, float)
    Af = A / (np.linalg.norm(A, "fro") + 1e-12)
    Bf = B / (np.linalg.norm(B, "fro") + 1e-12)

    mats  = [Af, Bf, Af - Bf, np.zeros_like(A)]
    names = [
        f"{nameA} / ||{nameA}||_F",
        f"{nameB} / ||{nameB}||_F",
        "Difference (unit-energy)",
        "(unused)"
    ]
    visualize_models(mats, names, H=H, p=p, use_abs=use_abs, q_low=q_low, q_high=q_high)


In [32]:
mats_DHS_v_RHS, names_DHS_v_RHS = plot_point1_aligned(np.mean((shrink_PS_DHS), axis=0), np.mean((shrink_PS_RHS), axis=0), "DHS", "RHS")

In [33]:
mats_DHS_v_gauss, names_DHS_v_gauss = plot_point1_aligned(np.mean((shrink_PS_DHS), axis=0), np.mean((shrink_PS_gauss), axis=0), "DHS", "Gauss")

In [34]:
mats_DST_v_RHS, names_DST_v_RHS = plot_point1_aligned(np.mean((shrink_PS_DST), axis=0), np.mean((shrink_PS_RHS), axis=0), "DST", "RHS")

In [35]:
mats_DST_v_gauss, names_DST_v_gauss = plot_point1_aligned(np.mean((shrink_PS_DST), axis=0), np.mean((shrink_PS_gauss), axis=0), "DST", "Gauss")

In [None]:
mats_combined = [
    mats_DHS_v_RHS[2],
    mats_DHS_v_gauss[2],
    mats_DST_v_RHS[2],
    mats_DST_v_gauss[2],
]

names_combined = [
    names_DHS_v_RHS[2],
    names_DHS_v_gauss[2],
    names_DST_v_RHS[2],
    names_DST_v_gauss[2],
]

visualize_models(mats_combined, names_combined, H=16, p=8, use_abs=False, q_low=0.05, q_high=0.99)

In [None]:
mats = [np.mean(shrink_PS_gauss, axis=0), np.mean(shrink_PS_RHS, axis=0), np.mean(shrink_PS_DHS, axis=0), np.mean(shrink_PS_DST, axis=0)]
names = ["Gaussian", "RHS", "Dirichlet–HS", "Dirichlet–ST"]
visualize_models(mats, names, H=16, p=8, use_abs=False, q_low=0.05, q_high=0.99)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.spatial.distance import pdist, squareform

# ------------------------------------------------------
# Assume: shrink_PS_gauss etc. each are (4000, 128, 128)
# ------------------------------------------------------

models = {
    "Gaussian": shrink_PS_gauss,
    "RHS": shrink_PS_RHS,
    "Dirichlet–HS": shrink_PS_DHS,
    "Dirichlet–ST": shrink_PS_DST,
}

# --- 1️⃣ Compute posterior means of each operator ---
mean_ops = {name: np.mean(arr, axis=0) for name, arr in models.items()}

# --- 2️⃣ Pairwise cosine distances between posterior means ---
K_flat = np.stack([v.ravel() for v in mean_ops.values()])
dist_matrix = squareform(pdist(K_flat, metric="cosine"))

plt.figure(figsize=(6, 5))
sns.heatmap(
    dist_matrix,
    annot=True, fmt=".3f",
    xticklabels=list(mean_ops.keys()),
    yticklabels=list(mean_ops.keys()),
    cmap="mako", square=True, cbar_kws={"label": "Cosine distance"}
)
plt.title("Pairwise cosine distances between mean shrinkage operators")
plt.tight_layout()
plt.show()

# --- 3️⃣ Eigenvalue spectra across posterior draws ---
plt.figure(figsize=(7, 5))
for name, arr in models.items():
    eigs_all = np.linalg.eigvalsh(arr)   # shape (4000, 128)
    eigs_mean = np.mean(eigs_all, axis=0)
    eigs_std  = np.std(eigs_all, axis=0)
    idx = np.argsort(eigs_mean)[::-1]
    plt.plot(eigs_mean[idx], label=name)
    plt.fill_between(np.arange(len(eigs_mean)), 
                     eigs_mean[idx] - eigs_std[idx],
                     eigs_mean[idx] + eigs_std[idx],
                     alpha=0.2)
plt.xlabel("Eigenvalue index (sorted)")
plt.ylabel("Eigenvalue magnitude")
plt.legend()
plt.title("Eigenvalue spectra of shrinkage operators")
plt.tight_layout()
plt.show()


In [8]:
import numpy as np
import matplotlib.pyplot as plt

# --- konstanter og blokkindekser (tilpass hvis din vec-rekkefølge er annerledes)
H, p = 16, 8
BLOCKS = [slice(h*p, (h+1)*p) for h in range(H)]

def block_energy(U, blocks=BLOCKS):
    BE = np.empty((U.shape[1], len(blocks)))  # (modes, H)
    for b, sl in enumerate(blocks):
        BE[:, b] = (U[sl, :]**2).sum(axis=0)
    BE /= BE.sum(axis=1, keepdims=True)
    return BE  # (modes, H)

def evd_metrics(G):
    w, U = np.linalg.eigh(G)                 # G sym/PSD
    # sorter synkende på w
    order = np.argsort(w)[::-1]
    w, U = w[order], U[:, order]
    rho = w / (1.0 + w)
    m_eff = rho.sum()
    ipr = (U**4).sum(axis=0)                  # inverse participation ratio
    eff_support = 1.0 / ipr                   # effektiv støtte
    return dict(w=w, U=U, rho=rho, m_eff=m_eff, ipr=ipr, eff_supp=eff_support)

def m_eff_blocks_from_G(G):
    M  = evd_metrics(G)
    BE = block_energy(M['U'], BLOCKS)        # (modes, H)
    m_eff_b = (M['rho'][:, None] * BE).sum(axis=0)  # (H,)
    return m_eff_b

In [9]:
# ===== Forutsetter at du har disse =====
W2_gauss_samps = tanh_fit['Gaussian tanh']['posterior'].stan_variable("W_L")#[:100]
W2_RHS_samps = tanh_fit['Regularized Horseshoe tanh']['posterior'].stan_variable("W_L")#[:100]
W2_DHS_samps = tanh_fit['Dirichlet Horseshoe tanh']['posterior'].stan_variable("W_L")#[:100]
W2_DST_samps = tanh_fit['Dirichlet Student T tanh']['posterior'].stan_variable("W_L")#[:100]

S = 4000

# --- beregn m_eff per blokk for ALLE samples
m_eff_blocks_GAUSS = np.zeros((S, H))
m_eff_blocks_RHS   = np.zeros((S, H))
m_eff_blocks_DHS   = np.zeros((S, H))
m_eff_blocks_DST = np.zeros((S, H))
for s in range(S):
    m_eff_blocks_GAUSS[s] = m_eff_blocks_from_G(G_gauss[s])
    m_eff_blocks_RHS[s] = m_eff_blocks_from_G(G_RHS[s])
    m_eff_blocks_DHS[s]   = m_eff_blocks_from_G(G_DHS[s])
    m_eff_blocks_DST[s]   = m_eff_blocks_from_G(G_DST[s])
    

# --- klargjør |W2| i samme form
W2_GAUSS_flat = np.abs(np.atleast_2d(W2_gauss_samps).reshape(S, H))
W2_RHS_flat   = np.abs(np.atleast_2d(W2_RHS_samps).reshape(S, H))
W2_DHS_flat   = np.abs(np.atleast_2d(W2_DHS_samps).reshape(S, H))
W2_DST_flat   = np.abs(np.atleast_2d(W2_DST_samps).reshape(S, H))

# --- flate til 1D for scatter
x_gau = m_eff_blocks_GAUSS.ravel()
y_gau = W2_GAUSS_flat.ravel()
x_rhs = m_eff_blocks_RHS.ravel()
y_rhs = W2_RHS_flat.ravel()
x_dhs = m_eff_blocks_DHS.ravel()
y_dhs = W2_DHS_flat.ravel()
x_dst = m_eff_blocks_DST.ravel()
y_dst = W2_DST_flat.ravel()

In [None]:
plt.figure()
plt.scatter(x_gau, y_gau, label="Gaussian", s=8, alpha=0.35)
plt.scatter(x_rhs, y_rhs, label="RHS", s=8, alpha=0.35)
plt.scatter(x_dhs, y_dhs, label="DHS", s=8, alpha=0.35)
plt.scatter(x_dst, y_dst, label="DST", s=8, alpha=0.35)
plt.xlabel(r"$m_{\mathrm{eff}}^{(b)}$")
plt.ylabel(r"$|W_2|$")
plt.legend()
plt.show()

In [37]:
from utils.generate_data import load_abalone_regression_data
X, X_test, y, y_test = load_abalone_regression_data(standardized=False, frac=1.0)
# Coerce everything to plain float64 NumPy arrays
X      = np.asarray(X, dtype=float)
X_test = np.asarray(X_test, dtype=float)

# y often comes as a (n,1) DataFrame/array — flatten to (n,)
y      = np.asarray(y, dtype=float).reshape(-1)
y_test = np.asarray(y_test, dtype=float).reshape(-1)

In [None]:
from utils.kappa_matrix import extract_model_draws, compute_shrinkage_for_W_block, shrinkage_eigs_and_df
from utils.sparsity import local_prune_weights

def compute_shrinkage_with_pruning(
    X,
    W_all, b_all, v_all,          # (D,H,p), (D,H), (D,H)
    sigma_all, tau_w_all, tau_v_all,  # (D,), (D,), (D,)
    lambda_all,                   # (D,H,p)
    activation="tanh",
    return_mats=True,             # set False if you only want summaries
    include_b1_in_Sigma: bool = True,
    include_b2_in_Sigma: bool = True,
    sparsity = 0.0
):
    """
    Loop over draws and compute R=(P+S)^{-1}P per draw using your single-draw function.
    Returns:
      R_stack : (D, N, N) with N=H*p  (if return_mats=True, else None)
      r_eigs  : (D, N)  sorted eigenvalues in [0,1]
      df_eff  : (D,)    effective dof = tr(I-R) = N - tr(R)
    """
    D, H, p = W_all.shape
    N = H * p

    R_stack = np.empty((D, N, N)) if return_mats else None
    S_stack = np.empty((D, N, N)) if return_mats else None
    P_stack = np.empty((D, N, N)) if return_mats else None
    G_stack = np.empty((D, N, N)) if return_mats else None
    shrink_stack= np.empty((D, N, N)) if return_mats else None
    r_eigs  = np.empty((D, N))
    df_eff  = np.empty(D)

    for d in range(D):
        mask = local_prune_weights(W_all[d], sparsity_level=sparsity)
        W_pruned = mask[0]*W_all[d]
        R, P, S, Sigma_y, _, _ = compute_shrinkage_for_W_block(
            X=X,
            W0=W_pruned,
            b0=b_all[d],
            v0=v_all[d],
            noise=float(sigma_all[d]),
            tau_w=float(tau_w_all[d]),
            tau_v=float(tau_v_all[d]),
            lambda_tilde=lambda_all[d],
            activation=activation,
            include_b1_in_Sigma=include_b1_in_Sigma,
            include_b2_in_Sigma=include_b2_in_Sigma,
        )
        p = np.diag(P)                       
        P_inv_sqrt = np.diag(1.0/np.sqrt(p))         
        G = P_inv_sqrt @ S @ P_inv_sqrt 
        I = np.identity(N)
        shrink_mat = np.linalg.inv(I + G)@G

        if return_mats:
            R_stack[d] = R
            S_stack[d] = S
            P_stack[d] = P
            G_stack[d] = G
            shrink_stack[d] = shrink_mat
        
        r, df = shrinkage_eigs_and_df(P, S)
        r_eigs[d] = np.sort(r)
        df_eff[d] = df

    return R_stack, S_stack, P_stack, G_stack, shrink_stack, r_eigs, df_eff


W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    tanh_fit, model='Gaussian tanh'
)
R_gauss, S_gauss, P_gauss, G_gauss, shrink_gauss, eigs_gauss, df_gauss = compute_shrinkage_with_pruning(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with Gauss")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    tanh_fit, model='Regularized Horseshoe tanh'
)

R_RHS, S_RHS, P_RHS, G_RHS, shrink_RHS, eigs_RHS, df_eff_RHS = compute_shrinkage_with_pruning(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with RHS")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    tanh_fit, model='Dirichlet Horseshoe tanh'
)
R_DHS, S_DHS, P_DHS, G_DHS, shrink_DHS, eigs_DHS, df_eff_DHS = compute_shrinkage_with_pruning(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with DHS")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    tanh_fit, model='Dirichlet Student T tanh'
)
R_DST, S_DST, P_DST, G_DST, shrink_DST, eigs_DST, df_eff_DST = compute_shrinkage_with_pruning(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with DST")

In [None]:
import os
import numpy as np

os.makedirs("Abalone_matrices", exist_ok=True)

def save_PS(model_name, P, S):
    fn = os.path.join("Abalone_matrices", f"{model_name.replace(' ', '_')}_PS.npz")
    np.savez_compressed(fn, P=np.asarray(P, dtype=np.float32), S=np.asarray(S, dtype=np.float32))
    print(f"Saved {fn}  with P,S shapes={P.shape},{S.shape}  dtype=float32")

# Call once per model (arrays are shape (4000, 160, 160))
save_PS("Gaussian_sparsity_90",            P_gauss, S_gauss)
save_PS("Regularized_Horseshoe_sparsity_90",    P_RHS,   S_RHS)
save_PS("Dirichlet_Horseshoe_sparsity_90", P_DHS,   S_DHS)
save_PS("Dirichlet_StudentT_sparsity_90",  P_DST,   S_DST)


In [34]:
# --- Traces as distributions (df_eff = tr(R) vs total shrinkage = tr(I-R)) ---
import matplotlib.pyplot as plt

# Effective dof: trace of (I+G)^{-1}G per draw
# tr_R_gauss = np.trace(shrink_G_gauss, axis1=1, axis2=2)
# tr_R_RHS   = np.trace(shrink_G_RHS,   axis1=1, axis2=2)
# tr_R_DHS   = np.trace(shrink_G_DHS,   axis1=1, axis2=2)
# tr_R_DST   = np.trace(shrink_G_DST,   axis1=1, axis2=2)

# If you also want “total shrinkage”, use your SP_inv_S_* stacks (I - R):
tr_SPinvS_gauss = np.trace(shrink_gauss, axis1=1, axis2=2)
tr_SPinvS_RHS   = np.trace(shrink_RHS,   axis1=1, axis2=2)
tr_SPinvS_DHS   = np.trace(shrink_DHS,   axis1=1, axis2=2)
tr_SPinvS_DST   = np.trace(shrink_DST,   axis1=1, axis2=2)



In [None]:
plt.figure(figsize=(8,4), dpi=150)
bins = 40
plt.hist(tr_SPinvS_gauss, bins=bins, alpha=0.5, label="Gauss")
plt.hist(tr_SPinvS_RHS,   bins=bins, alpha=0.5, label="RHS")
plt.hist(tr_SPinvS_DHS,   bins=bins, alpha=0.5, label="DHS")
plt.hist(tr_SPinvS_DST,   bins=bins, alpha=0.5, label="DST")
plt.xlabel(r"$tr((P+S)^{-1}S)$")
plt.ylabel("Frequency")
plt.legend()
plt.tight_layout()
plt.show()