In [None]:
import sys, os; sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__) if '__file__' in globals() else os.getcwd(), '..')))
#import os; os.chdir(os.path.dirname(os.getcwd()))
from utils.model_loader import get_model_fits
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
results_dir_priors = "results/priors/single_layer/tanh/friedman"

prior_names = ["Dirichlet Horseshoe", "Regularized Horseshoe", "Dirichlet Student T", "Gaussian"]


prior_N100_fits = get_model_fits(
    config="Friedman_N100_p10_sigma1.00_seed1",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)

prior_N200_fits = get_model_fits(
    config="Friedman_N200_p10_sigma1.00_seed2",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)

prior_N500_fits = get_model_fits(
    config="Friedman_N500_p10_sigma1.00_seed11",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Choose the hidden node index to inspect
node_idx = 1  # e.g., first hidden node

def extract_W_for_node(cmdstan_mcmc, var_name="W_1", node=0):
    """Return array shape (n_draws, P) of weights feeding into a given node."""
    W = cmdstan_mcmc.stan_variable(var_name)   # shape (n_draws, P, H)
    return W[:, :, node]                       # (n_draws, P)

# Grab arrays for each prior from your dict
W_gauss = extract_W_for_node(prior_N500_fits['Gaussian']['posterior'], node=node_idx)
W_reg_hs = extract_W_for_node(prior_N500_fits['Regularized Horseshoe']['posterior'], node=node_idx)
W_dir_hs = np.sqrt(10)*extract_W_for_node(prior_N500_fits['Dirichlet Horseshoe']['posterior'], node=node_idx)
W_dir_st = np.sqrt(10)*extract_W_for_node(prior_N500_fits['Dirichlet Student T']['posterior'], node=node_idx)
#W_dir_gam = extract_W_for_node(prior_N500_fits['Dirichlet Gamma']['Dirichlet Gamma']['posterior'], node=node_idx)

models = {
    "Gauss": W_gauss,
    "RHS": W_reg_hs,
    "DHS": W_dir_hs,
    "DS-T": W_dir_st,
    #"DG": W_dir_gam
}


In [None]:
def rank_profile(W):
    # sort |w| per draw, descending; then average by rank
    sorted_abs = np.sort(np.abs(W), axis=1)[:, ::-1]         # (n_draws, P)
    mean_rank = sorted_abs.mean(axis=0)
    q05 = np.quantile(sorted_abs, 0.05, axis=0)
    q95 = np.quantile(sorted_abs, 0.95, axis=0)
    return mean_rank, q05, q95

plt.figure(figsize=(7,4))
for name, W in models.items():
    mean_rank, q05, q95 = rank_profile(W)
    x = np.arange(1, W.shape[1]+1)
    plt.plot(x, mean_rank, marker='o', label=name)
plt.xlabel("Rank")
plt.ylabel("Average |w|")
plt.title(f"Average magnitude by rank")
plt.legend(); plt.tight_layout(); plt.show()


In [None]:
def topk_curve(W):
    sq = W**2
    shares = sq / sq.sum(axis=1, keepdims=True)              # per-draw normalization
    # Average of top-k shares ≈ cumsum of mean ordered shares
    ordered = np.sort(shares, axis=1)[:, ::-1]
    return ordered.mean(axis=0).cumsum()

plt.figure(figsize=(7,4))
for name, W in models.items():
    c = topk_curve(W)
    plt.plot(np.arange(1, len(c)+1), c, marker='.', label = name)
#plt.axhline(0.9, ls='--', lw=1, label='90%')
#plt.axhline(0.95, ls='--', lw=1, label='95%')
plt.xlabel("Rank"); plt.ylabel("Expected share")
plt.title(f"Cumulative share of squared weights")
plt.legend(); plt.tight_layout(); plt.show()


In [None]:
def winner_freq(W):
    winners = np.argmax(np.abs(W), axis=1)            # index of largest |w| per draw
    P = W.shape[1]
    counts = np.bincount(winners, minlength=P)
    return counts / counts.sum()

freq_df = pd.DataFrame({name: winner_freq(W) for name, W in models.items()})
freq_df.index = [f"input_{i}" for i in range(freq_df.shape[0])]
print(freq_df.round(3))


In [None]:
def gini(v):
    v = np.sort(v)
    n = v.size
    return (np.sum((2*np.arange(1, n+1) - n - 1) * v)) / (n * v.sum())

def gini_over_draws(W):
    sq = W**2
    shares = sq / sq.sum(axis=1, keepdims=True)
    return np.apply_along_axis(gini, 1, shares)

for name, W in models.items():
    g = gini_over_draws(W)
    print(f"{name}: mean Gini = {g.mean():.3f}   (0=uniform, 1=one-hot)")


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

EPS = 1e-12

def shares_from_W(W):
    """Row-normalized squared weights (per draw)."""
    sq = W**2
    return sq / (sq.sum(axis=1, keepdims=True) + EPS)

# --- Shannon entropy and friends ---
def shannon_entropy_over_draws(W):
    """
    Natural-log entropy per draw.
    Range: [0, ln P]. 0 if one-hot, ln P if uniform.
    """
    p = shares_from_W(W)
    return -(p * np.log(p + EPS)).sum(axis=1)

def norm_entropy_over_draws(W):
    """
    Normalized sparsity-style measure in [0,1]:
    0=uniform, 1=one-hot.
    """
    H = shannon_entropy_over_draws(W)
    P = W.shape[1]
    return 1.0 - H / (np.log(P) + EPS)

def perplexity_over_draws(W):
    """
    Effective count via entropy: exp(H) in [1, P].
    """
    H = shannon_entropy_over_draws(W)
    return np.exp(H)


# --- Convenience: summarize per model ---
def summarize_entropy_kl(models):
    rows = []
    for name, W in models.items():
        H = shannon_entropy_over_draws(W)
        Hn = 1 - H / (np.log(W.shape[1]) + EPS)
        PPX = np.exp(H)

        def m_ci(x):
            return np.mean(x), np.quantile(x, 0.05), np.quantile(x, 0.95)

        H_m, H_l, H_u = m_ci(H)
        Hn_m, Hn_l, Hn_u = m_ci(Hn)
        PPX_m, PPX_l, PPX_u = m_ci(PPX)

        rows.append({
            "model": name,
            "H (nats) mean": H_m, "H p05": H_l, "H p95": H_u,
            "H_norm mean": Hn_m, "H_norm p05": Hn_l, "H_norm p95": Hn_u,
            "Perplexity mean": PPX_m, "Perplexity p05": PPX_l, "Perplexity p95": PPX_u,
        })
    return pd.DataFrame(rows).set_index("model")

# Example: print a compact summary table
summary_df = summarize_entropy_kl(models)
print(summary_df[[
    "H_norm mean", "Perplexity mean"
]].round(3))

# Optional: quick visual comparing normalized entropy & KL across models
plt.figure(figsize=(6.5, 3.8))
x = np.arange(len(models))
bar_w = 0.35
Hn_means = [norm_entropy_over_draws(W).mean() for W in models.values()]
plt.bar(x - bar_w/2, Hn_means, width=bar_w, label="1 - H/ln P")
plt.xticks(x, list(models.keys()), rotation=15)
plt.ylim(0, 1)
plt.ylabel("Normalized (0=uniform, 1=one-hot)")
plt.title(f"Entropy & KL sparsity — node {node_idx}")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import rankdata

# generic: rank→uniform columnwise (works for any matrix A)
def empirical_copula_cols(A):
    n, P = A.shape
    U = np.zeros_like(A, dtype=float)
    for j in range(P):
        U[:, j] = rankdata(A[:, j], method="average") / (n + 1.0)
    return U

def normalize_rows(W, mode="none", eps=1e-12):
    if mode == "none":
        return W
    if mode == "l2":
        norms = np.linalg.norm(W, axis=1, keepdims=True)
        return W / np.maximum(norms, eps)
    if mode == "mad":  # robust alternativ
        med = np.median(W, axis=1, keepdims=True)
        mad = np.median(np.abs(W - med), axis=1, keepdims=True)
        return (W - med) / np.maximum(mad, eps)
    raise ValueError("mode must be 'none', 'l2', or 'mad'")

def tail_dependence_curve_from_matrix(A, i=0, j=1, u_grid=None):
    if u_grid is None:
        u_grid = np.linspace(0.80, 0.99, 25)
    U = empirical_copula_cols(A)
    ui, uj = U[:, i], U[:, j]
    lam = np.array([np.mean(uj[ui > u] > u) if np.any(ui > u) else np.nan for u in u_grid])
    return u_grid, lam

def plot_tail_dependence(models, i=0, j=1, u_grid=None):
    if u_grid is None:
        u_grid = np.linspace(0.80, 0.99, 25)

    curves = {}
    ymax = 0.0
    for name, W in models.items():
        #Wn = normalize_rows(W, mode="l2")
        A = np.abs(W)  # use |w|
        u, lam = tail_dependence_curve_from_matrix(A, i=i, j=j, u_grid=u_grid)
        curves[name] = (u, lam)
        ymax = max(ymax, np.nanmax(lam))
    baseline = 1.0 - u_grid
    ymax = max(ymax, baseline.max())

    fig, axes = plt.subplots(2, 2, figsize=(5, 5), sharex=True, sharey=True)
    axes = axes.ravel()
    i = 0
    for ax, (name, (u, lam)) in zip(axes, curves.items()):
        ax.plot(u, lam, marker='o', ms=3, lw=1)
        ax.plot(u_grid, baseline, linestyle='--', lw=1.2, label='(1 - u)')
        ax.set_title(name)
        ax.set_ylim(0, min(1.0, ymax * 1.05))
        ax.set_xlim(0.8, 1)
        ax.set_xticks([0.8, 0.9, 1.0])
        ax.grid(True, linewidth=0.4, alpha=0.4)
        if i > 1:
            ax.set_xlabel("u")
        if i == 0 or i == 2:
            ax.set_ylabel(r"$\lambda_U(u) = P(U_j>u \mid U_i>u)$")
        ax.legend(loc='upper right', fontsize=8)
        i += 1

    #fig.suptitle(f"Upper-tail dependence — |w|, inputs {i} vs {j}", y=0.98)
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

# Example:
plot_tail_dependence(models, i=0, j=1)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from numpy.random import default_rng
# assumes empirical_copula_cols() is already defined above
# assumes shares_from_W(W) is already defined in your session

def _gaussian_shares_baseline_lambdaU(P, i=0, j=1, u_grid=None, N=200_000, seed=0):
    rng = default_rng(seed)
    if u_grid is None:
        u_grid = np.linspace(0.80, 0.99, 25)
    alpha = np.full(P, 0.5, dtype=float)
    p_base = rng.dirichlet(alpha, size=N)    # Gaussian-shares baseline
    U = empirical_copula_cols(p_base)
    ui, uj = U[:, i], U[:, j]
    lam_base = np.array([np.mean(uj[ui > u] > u) if np.any(ui > u) else np.nan for u in u_grid])
    return u_grid, lam_base

def tail_dependence_on_shares_gauss_baseline(models, i=0, j=1, u_grid=None, N=200_000, seed=0):
    if u_grid is None:
        u_grid = np.linspace(0.80, 0.99, 25)

    P = next(iter(models.values())).shape[1]
    u_base, lam_base = _gaussian_shares_baseline_lambdaU(P, i=i, j=j, u_grid=u_grid, N=N, seed=seed)

    curves = {}
    ymax = np.nanmax(lam_base)
    for name, W in models.items():
        p = shares_from_W(W)           # your existing function
        U = empirical_copula_cols(p)
        ui, uj = U[:, i], U[:, j]
        lam = np.array([np.mean(uj[ui > u] > u) if np.any(ui > u) else np.nan for u in u_grid])
        curves[name] = lam
        ymax = max(ymax, np.nanmax(lam))

    fig, axes = plt.subplots(2, 2, figsize=(5, 5), sharex=True, sharey=True)
    axes = axes.ravel()
    i = 0
    for ax, (name, lam) in zip(axes, curves.items()):
        ax.plot(u_grid, lam, marker='o', ms=3, lw=1)
        ax.plot(u_base, lam_base, ls='--', lw=1.2, label='Dir(½)')
        ax.set_title(name)
        ax.set_ylim(0, min(1.0, ymax * 1.05))
        ax.set_xlim(0.8, 1)
        ax.grid(True, linewidth=0.4, alpha=0.4)
        if i > 1:
            ax.set_xlabel("u")
        if i == 0 or i == 2:
            ax.set_ylabel(r"$\lambda_U(u) = P(U_j>u \mid U_i>u)$")
        ax.legend(fontsize=8, loc='upper right')
        i += 1

    #fig.suptitle(r"Upper-tail dependence of $\frac{w^2}{\sum_k w_k^2}$ — (baseline = Gaussian ⇒ Dir(½))", y=0.98)
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

# Example:
tail_dependence_on_shares_gauss_baseline(models, i=0, j=1, N=200_000, seed=42)


## TESTING STUFF

In [None]:
results_dir_priors = "results/tweak_tau"

prior_names = ["Regularized Horseshoe"]


p0_1_fit = get_model_fits(
    config="p0_1",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)

p0_2_fit = get_model_fits(
    config="p0_2",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)

p0_4_fit = get_model_fits(
    config="p0_4",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)

p0_8_fit = get_model_fits(
    config="p0_8",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)

p0_9_fit = get_model_fits(
    config="p0_9",
    results_dir=results_dir_priors,
    models=prior_names,
    include_prior=False,
)

results_dir_priors_other = "results/priors/single_layer/tanh/friedman"


prior_N100_fits = get_model_fits(
    config="Friedman_N100_p10_sigma1.00_seed1",
    results_dir=results_dir_priors_other,
    models=prior_names,
    include_prior=False,
)

In [10]:
path = "datasets/friedman/Friedman_N100_p10_sigma1.00_seed1.npz"
data = np.load(path)
X = data['X_train']
y = data['y_train']

In [None]:
from utils.kappa_matrix import extract_model_draws, compute_shrinkage

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    p0_1_fit, model='Regularized Horseshoe'
)

R_p01, S_p01, P_p01, G_p01, shrink_p01, eigs_p01, df_eff_p01 = compute_shrinkage(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with p0_1")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    p0_2_fit, model='Regularized Horseshoe'
)

R_p02, S_p02, P_p02, G_p02, shrink_p02, eigs_p02, df_eff_p02 = compute_shrinkage(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with p0_2")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    p0_4_fit, model='Regularized Horseshoe'
)

R_p04, S_p04, P_p04, G_p04, shrink_p04, eigs_p04, df_eff_p04 = compute_shrinkage(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with p0_4")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    p0_8_fit, model='Regularized Horseshoe'
)

R_p08, S_p08, P_p08, G_p08, shrink_p08, eigs_p08, df_eff_p08 = compute_shrinkage(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with p0_8")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    p0_9_fit, model='Regularized Horseshoe'
)

R_p09, S_p09, P_p09, G_p09, shrink_p09, eigs_p09, df_eff_p09 = compute_shrinkage(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with p0_9")

W, b1, v, b2, noise, tau_w, tau_v, lambda_eff = extract_model_draws(
    prior_N100_fits, model='Regularized Horseshoe'
)

R_other, S_other, P_other, G_other, shrink_other, eigs_other, df_eff_other = compute_shrinkage(
    X, W, b1, v, noise, tau_w, tau_v, lambda_eff,
    activation="tanh",
    include_b1_in_Sigma=True,
    include_b2_in_Sigma=True,
)
print("done with other")


In [13]:
SP_inv_S_p01 = np.eye(16*10)[:, :] - R_p01
SP_inv_S_p02 = np.eye(16*10)[:, :] - R_p02
SP_inv_S_p04 = np.eye(16*10)[:, :] - R_p04
SP_inv_S_p08 = np.eye(16*10)[:, :] - R_p08
SP_inv_S_p09 = np.eye(16*10)[:, :] - R_p09

SP_inv_S_other = np.eye(16*10)[:, :] - R_other

In [14]:
# If you also want “total shrinkage”, use your SP_inv_S_* stacks (I - R):
tr_SPinvS_p01 = np.trace(SP_inv_S_p01, axis1=1, axis2=2)
tr_SPinvS_p02   = np.trace(SP_inv_S_p02,   axis1=1, axis2=2)
tr_SPinvS_p04   = np.trace(SP_inv_S_p04,   axis1=1, axis2=2)
tr_SPinvS_p08   = np.trace(SP_inv_S_p08,   axis1=1, axis2=2)
tr_SPinvS_p09   = np.trace(SP_inv_S_p09,   axis1=1, axis2=2)

tr_SPinvS_other   = np.trace(SP_inv_S_other,   axis1=1, axis2=2)



In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,4), dpi=150)
bins = 40
plt.hist(tr_SPinvS_p01, bins=bins, alpha=0.5, label="p0=1")
plt.hist(tr_SPinvS_p02,   bins=bins, alpha=0.5, label="p0=2")
plt.hist(tr_SPinvS_p04,   bins=bins, alpha=0.5, label="p0=4")
plt.hist(tr_SPinvS_p08,   bins=bins, alpha=0.5, label="p0=8")
plt.hist(tr_SPinvS_p09,   bins=bins, alpha=0.5, label="p0=9")
plt.hist(tr_SPinvS_other,   bins=bins, alpha=0.5, label="other")
plt.xlabel(r"$tr((P+S)^{-1}S)$")
plt.ylabel("Frequency")
plt.legend()
plt.tight_layout()
plt.show()