In [5]:
import numpy as np
import os
import sys, os; sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__) if '__file__' in globals() else os.getcwd(), '..')))
from utils.model_loader import get_model_fits
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt


np.random.seed(123)

N = 250              # slightly larger sample
P = 10               # fixed number of covariates

# --- Sparse true coefficients ---
beta_true = np.array([3.0, -2.0, 0.0, 0.0, 1.5, 0.0, 0.8, 0.0, 0.0, 0.0])

# --- AR(1) correlated predictors ---
rho = 0.7
Sigma = rho ** np.abs(np.subtract.outer(np.arange(P), np.arange(P)))
L = np.linalg.cholesky(Sigma)
X = np.random.normal(size=(N, P)) @ L.T

# --- Inject some covariate outliers ---
outlier_fraction = 0.05
num_outliers = int(outlier_fraction * N)
outlier_rows = np.random.choice(N, num_outliers, replace=False)
X[outlier_rows] += np.random.normal(0, 8.0, size=(num_outliers, P))

# --- Heavy-tailed noise for y ---
# Student-t with small df (df=3)
df = 3
noise = np.random.standard_t(df, size=N)

# Scale noise to moderate amplitude
sigma_true = 0.7
noise *= sigma_true

# --- Generate y ---
y = X @ beta_true + noise


import numpy as np
from utils.generate_data import load_linreg_dataset

X_train, X_test, y_train, y_test, meta = load_linreg_dataset()


In [None]:
data_dir = f"datasets/linreg"
results_dir_linreg = "results/regression/linreg/alpha_learned"
results_dir_linreg_prior = "results/regression/linreg/prior"
#model_names_relu = ["Dirichlet Student T"]
model_names = ["Linreg Gaussian", "Linreg Regularized Horseshoe", "Linreg Dirichlet Horseshoe", "Linreg Dirichlet Student T"]


full_config_path = "linreg_N200_p10"
linreg_fit = get_model_fits(
    config=full_config_path,
    results_dir=results_dir_linreg,
    models=model_names,
    include_prior=False,
)

# prior_linreg_fit = get_model_fits(
#     config=full_config_path,
#     results_dir=results_dir_linreg_prior,
#     models=model_names,
#     include_prior=False,
# )


In [3]:
beta_gauss = linreg_fit['Linreg Gaussian']['posterior'].stan_variable("beta")
beta_RHS = linreg_fit['Linreg Regularized Horseshoe']['posterior'].stan_variable("beta")
beta_DHS = linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("beta")
beta_DST = linreg_fit['Linreg Dirichlet Student T']['posterior'].stan_variable("beta")
beta_GLS = np.linalg.pinv((X_train.T@X_train))@X_train.T@y_train

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde

# Make sure y_train is 1D shape (N,)
y_vec = np.asarray(y_test).reshape(-1)
N = X_train.shape[0]

def rmse_per_sample(beta_samples, X, y):
    """
    beta_samples: (S, P)
    X: (N, P)
    y: (N,)
    Returns: array (S,) of RMSEs, one per posterior draw
    """
    # Predictions for all samples at once: (N, S)
    preds = X @ beta_samples.T
    # Broadcast y to (N, S)
    errors = preds - y[:, None]
    mse = np.mean(errors**2, axis=0)
    rmse = np.sqrt(mse)
    return rmse

rmse_gauss_samps = rmse_per_sample(beta_gauss, X_test, y_vec)
rmse_RHS_samps   = rmse_per_sample(beta_RHS,   X_test, y_vec)
rmse_DHS_samps   = rmse_per_sample(beta_DHS,   X_test, y_vec)
rmse_DST_samps   = rmse_per_sample(beta_DST,   X_test, y_vec)

posterior_mean_rmse_gauss = np.sqrt(np.mean((y_test - X_test@np.mean(beta_gauss, axis=0))**2))
posterior_mean_rmse_RHS = np.sqrt(np.mean((y_test - X_test@np.mean(beta_RHS, axis=0))**2))
posterior_mean_rmse_DHS = np.sqrt(np.mean((y_test - X_test@np.mean(beta_DHS, axis=0))**2))
posterior_mean_rmse_DST = np.sqrt(np.mean((y_test - X_test@np.mean(beta_DST, axis=0))**2))
rmse_GLS = np.sqrt(np.mean((y_test - X_test@beta_GLS)**2))

# -------- Density plot of RMSEs --------
plt.figure(figsize=(8, 5))

def plot_rmse_kde(rmse_samples, label, posterior_mean_rmse, color):
    kde = gaussian_kde(rmse_samples)
    xs = np.linspace(np.percentile(rmse_samples, 1),
                     np.percentile(rmse_samples, 99), 200)
    plt.plot(xs, kde(xs), label=label, color=color)
    plt.axvline(x=posterior_mean_rmse, color=color, linestyle="--")
    plt.axvline(x=np.mean(rmse_samples), color=color, linestyle="-")

plot_rmse_kde(rmse_gauss_samps, "Gaussian", posterior_mean_rmse_gauss, "C0")
plot_rmse_kde(rmse_RHS_samps,   "RHS", posterior_mean_rmse_RHS, "C1")
plot_rmse_kde(rmse_DHS_samps,   "DHS", posterior_mean_rmse_DHS, "C2")
plot_rmse_kde(rmse_DST_samps,   "DST", posterior_mean_rmse_DST, "C3")

plt.axvline(x=rmse_GLS, label="GLS")
plt.xlabel("RMSE on training data")
plt.ylabel("Posterior density")
plt.title("Posterior distribution of RMSE per model")
plt.legend()
plt.tight_layout()
plt.show()


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

S, P = beta_gauss.shape  # number of draws, number of coefficients

# Put all draws into one long DataFrame
def beta_to_long_df(beta_array, model_name):
    """
    beta_array: (S, P)
    returns DataFrame with columns: model, draw, coeff, beta
    """
    S, P = beta_array.shape
    df = pd.DataFrame(
        beta_array.reshape(S * P),
        columns=["beta"]
    )
    df["draw"] = np.repeat(np.arange(S), P)
    df["coeff"] = np.tile(np.arange(P), S)
    df["model"] = model_name
    return df

df_gauss = beta_to_long_df(beta_gauss, "Gaussian")
df_RHS   = beta_to_long_df(beta_RHS,   "Regularized Horseshoe")
df_DHS   = beta_to_long_df(beta_DHS,   "Dirichlet Horseshoe")
df_DST   = beta_to_long_df(beta_DST,   "Dirichlet Student-t")

beta_df = pd.concat([df_gauss, df_RHS, df_DHS, df_DST], ignore_index=True)

# Optional: attach true beta if you have it loaded from the dataset
if "beta_true" in globals():
    beta_true_series = pd.Series(beta_true, index=np.arange(len(beta_true)))
    beta_df["beta_true"] = beta_df["coeff"].map(beta_true_series)
else:
    beta_true_series = None


In [None]:
# Boxplot per coefficient, grouped by model
fig, axes = plt.subplots(5, int(np.ceil(P / 5)), figsize=(16, 16), sharey=False)
axes = axes.flatten()

for j in range(P):
    ax = axes[j]
    df_j = beta_df[beta_df["coeff"] == j]
    # Make a simple boxplot of posterior for beta_j under each model
    data = [df_j[df_j["model"] == m]["beta"].values
            for m in ["Gaussian", "Regularized Horseshoe", "Dirichlet Horseshoe", "Dirichlet Student-t"]]
    ax.boxplot(data, showfliers=False)
    ax.set_xticks([1, 2, 3, 4])
    ax.set_xticklabels(["Gauss", "RHS", "DHS", "DST"], rotation=30)
    ax.set_title(f"β_{j+1}")

    # If true beta is known, add a horizontal line
    if beta_true_series is not None:
        ax.axhline(beta_true_series[j], linestyle="--", linewidth=1)

# Hide unused axes if P is odd
for k in range(P, len(axes)):
    axes[k].axis("off")

fig.suptitle("Posterior distributions of β_j by prior (boxplots)", fontsize=14)
fig.tight_layout()
plt.show()


In [None]:
from scipy.stats import gaussian_kde

def plot_beta_kde_for_coeff(j, ax):
    """
    j: coefficient index
    ax: matplotlib axis
    """
    df_j = beta_df[beta_df["coeff"] == j]

    for model_name, label in [
        ("Gaussian", "Gauss"),
        ("Regularized Horseshoe", "RHS"),
        ("Dirichlet Horseshoe", "DHS"),
        ("Dirichlet Student-t", "DST"),
    ]:
        samples = df_j[df_j["model"] == model_name]["beta"].values
        kde = gaussian_kde(samples)
        xs = np.linspace(np.percentile(samples, 1),
                         np.percentile(samples, 99), 200)
        ax.plot(xs, kde(xs), label=label, alpha=0.8)

    if beta_true_series is not None:
        ax.axvline(beta_true_series[j], linestyle="--", linewidth=1)

    ax.set_title(f"β_{j+1}")
    ax.legend(fontsize=8)

# Choose which coefficients to inspect more closely
coeffs_to_plot = [0, 1, 2, 3]  # likely non-zero in your synthetic setup
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
axes = axes.flatten()

for idx, j in enumerate(coeffs_to_plot):
    plot_beta_kde_for_coeff(j, axes[idx])

fig.suptitle("Marginal posterior densities of selected β_j", fontsize=14)
fig.tight_layout()
plt.show()


In [21]:
# beta_gauss = linreg_fit['Linreg Gaussian']['posterior'].stan_variable("beta")
sigma_RHS = linreg_fit['Linreg Regularized Horseshoe']['posterior'].stan_variable("sigma")
tau_RHS = linreg_fit['Linreg Regularized Horseshoe']['posterior'].stan_variable("tau")
lambda_RHS = linreg_fit['Linreg Regularized Horseshoe']['posterior'].stan_variable("lambda_tilde")

sigma_DHS = linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("sigma")
tau_DHS = linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("tau")
lambda_DHS = linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("lambda_data")
xi_DHS = linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("phi_data")

sigma_DST = linreg_fit['Linreg Dirichlet Student T']['posterior'].stan_variable("sigma")
tau_DST = linreg_fit['Linreg Dirichlet Student T']['posterior'].stan_variable("tau")
lambda_DST = linreg_fit['Linreg Dirichlet Student T']['posterior'].stan_variable("lambda_tilde")
xi_DST = linreg_fit['Linreg Dirichlet Student T']['posterior'].stan_variable("phi_data")


In [29]:
S, N, p = 4000, 200, 10

kappa_RHS = np.zeros((S, p))
kappa_DHS = np.zeros((S, p))
kappa_DST = np.zeros((S, p))

meff_RHS = np.zeros((S))
meff_DHS = np.zeros((S))
meff_DST = np.zeros((S))

for i in range(S):
    kappa_RHS[i] = 1/(1+(N*sigma_RHS[i]**(-2)*tau_RHS[i]**2*lambda_RHS[i]))
    kappa_DHS[i] = 1/(1+(N*sigma_DHS[i]**(-2)*tau_DHS[i]**2*lambda_DHS[i]*xi_DHS[i]))
    kappa_DST[i] = 1/(1+(N*sigma_DST[i]**(-2)*tau_DHS[i]**2*lambda_DST[i]*xi_DST[i]))
    
    meff_RHS[i] = np.sum(1 - kappa_RHS[i])
    meff_DHS[i] = np.sum(1 - kappa_DHS[i])
    meff_DST[i] = np.sum(1 - kappa_DST[i])



In [None]:
import numpy as np
import matplotlib.pyplot as plt

S, p = 4000, 10  # as before

# Indices of coefficients to visualize
idxs = [0, 1, 2, 6]
beta_true_vals = {0: 3, 1: -2, 2: 0, 6:0.8}   # your ground truth
titles = {0: r"$\beta_1$", 1: r"$\beta_2$", 2: r"$\beta_3$", 6: r"$\beta_7$"}  # optional niceness

def common_bins(*arrays, bins=40, range=None):
    """Compute common histogram bin edges for multiple arrays."""
    data = np.concatenate([a.ravel() for a in arrays])
    return np.histogram_bin_edges(data, bins=bins, range=range)

fig, axes = plt.subplots(len(idxs), 2, figsize=(12, 8), sharex=False, sharey="row")
fig.suptitle("Posterior distributions of shrinkage ($\\kappa$) and coefficients ($\\beta$)")

for row, j in enumerate(idxs):
    ax_kappa = axes[row, 0]
    ax_beta  = axes[row, 1]

    # --- Kappa posterior ---
    bins_kappa = common_bins(kappa_RHS[:, j], kappa_DHS[:, j], kappa_DST[:, j], bins=40, range=(0, 1.0))
    ax_kappa.hist(kappa_RHS[:, j], bins=bins_kappa, alpha=0.6, label="RHS", density=True)
    ax_kappa.hist(kappa_DHS[:, j], bins=bins_kappa, alpha=0.6, label="DHS", density=True)
    ax_kappa.hist(kappa_DST[:, j], bins=bins_kappa, alpha=0.6, label="DST", density=True)
    ax_kappa.set_xlabel(r"$\kappa$")
    ax_kappa.set_ylabel("Density" if row == 0 else "")
    ax_kappa.set_title(f"Kappa, {titles[j]}")

    # --- Beta posterior ---
    bins_beta = common_bins(beta_RHS[:, j], beta_DHS[:, j], bins=40)  # auto range
    ax_beta.hist(beta_RHS[:, j], bins=bins_beta, alpha=0.6, label="RHS", density=True)
    ax_beta.hist(beta_DHS[:, j], bins=bins_beta, alpha=0.6, label="DHS", density=True)
    ax_beta.hist(beta_DST[:, j], bins=bins_beta, alpha=0.6, label="DHS", density=True)

    # Add true and GLS lines
    ax_beta.axvline(beta_true_vals[j], alpha=0.9, label="Beta_true", color="green", linestyle="--")
    ax_beta.axvline(beta_GLS[j], alpha=0.9, label="Beta_GLS", color="red", linestyle=":")
    ax_beta.set_xlabel(r"$\beta$")
    ax_beta.set_title(f"Beta, {titles[j]}")

# Put a single legend outside
handles, labels = axes[0, 1].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")
fig.tight_layout(rect=[0, 0, 0.85, 0.95])
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

P = 10  # number of covariates
bins = np.arange(0, P + 2) - 0.5  # bin edges at integers

plt.figure(figsize=(5, 5))

sns.histplot(meff_RHS, bins=bins, stat="density", element="step",
             fill=True, label="RHS")
sns.histplot(meff_DHS, bins=bins, stat="density", element="step",
             fill=True, label="DHS")
sns.histplot(meff_DST, bins=bins, stat="density", element="step",
             fill=True, label="DST")

# vertical line at true number of active coefficients
plt.axvline(4, color="black", linestyle="--", linewidth=1.5,
            label="True active = 4")

# add posterior means as markers
for Meff, label, color in [
    (meff_RHS, "RHS", "C0"),
    (meff_DHS, "DHS", "C1"),
    (meff_DST, "DST", "C2"),
]:
    mean_val = np.mean(Meff)
    plt.axvline(mean_val, color=color, linestyle=":",
                linewidth=1.5)
    plt.text(mean_val + 0.1, plt.ylim()[1]*0.8,
             f"{label} mean={mean_val:.1f}",
             color="black", fontsize=9, rotation=90, va="top")

plt.xticks(range(0, P + 1))
plt.xlabel(r"Effective number of parameters $m_{\mathrm{eff}}$")
plt.ylabel("Posterior density")
plt.title("Posterior distribution of effective parameters")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

idxs   = [0, 1, 2, 6]
titles = {0: r"$\beta_1$", 1: r"$\beta_2$", 2: r"$\beta_3$", 6: r"$\beta_7$"}

fig, axes = plt.subplots(len(idxs), 1, figsize=(7, 9), sharex=True)
fig.suptitle(r"Joint posterior of ($\kappa_j$, $\beta_j$) for RHS vs DHS")

for row, j in enumerate(idxs):
    ax = axes[row]
    ax.scatter(kappa_RHS[:, j], beta_RHS[:, j],
               alpha=0.2, s=8, label="RHS")
    ax.scatter(kappa_DHS[:, j], beta_DHS[:, j],
               alpha=0.2, s=8, label="DHS")
    ax.scatter(kappa_DST[:, j], beta_DST[:, j],
               alpha=0.2, s=8, label="DST")
    ax.axhline(y=beta_true_vals[j])

    ax.set_title(titles[j])
    ax.set_ylabel(r"$\beta_j$")
    if row == len(idxs) - 1:
        ax.set_xlabel(r"$\kappa_j$")

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")
plt.tight_layout(rect=[0, 0, 0.85, 0.95])
plt.show()


In [None]:
alpha_DHS = linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("alpha")
alpha_DST = linreg_fit['Linreg Dirichlet Student T']['posterior'].stan_variable("alpha")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# alpha_DHS, alpha_DST : shape (n_draws, H)
n_draws, H = alpha_DHS.shape

# Long-format DataFrame
df_list = []

for h in range(H):
    df_list.append(pd.DataFrame({
        "alpha": alpha_DHS[:, h],
        "unit": h,
        "model": "DHS"
    }))
    df_list.append(pd.DataFrame({
        "alpha": alpha_DST[:, h],
        "unit": h,
        "model": "DST"
    }))

df_alpha = pd.concat(df_list, ignore_index=True)

# Optionally: log-scale because alpha can be small
df_alpha["log_alpha"] = np.log(df_alpha["alpha"])

plt.figure(figsize=(12, 5))

# 1) Raw alpha
plt.subplot(1, 2, 1)
sns.violinplot(
    data=df_alpha, x="unit", y="alpha", hue="model",
    cut=0, inner="quartile", scale="width"
)
plt.yscale("log")           # often sensible for α
plt.title("Posterior of α per unit (log scale)")
plt.xlabel("Coefficient")
plt.ylabel("α")
plt.legend(title="Model")

# 2) Directly plot log α (nicer shape)
plt.subplot(1, 2, 2)
sns.violinplot(
    data=df_alpha, x="unit", y="log_alpha", hue="model",
    cut=0, inner="quartile", scale="width"
)
plt.title("Posterior of log(α) per unit")
plt.xlabel("Coefficient")
plt.ylabel("log α")
plt.legend(title="Model")

plt.tight_layout()
plt.show()


## Priors

In [29]:
# beta_gauss = linreg_fit['Linreg Gaussian']['posterior'].stan_variable("beta")
sigma_RHS_prior = prior_linreg_fit['Linreg Regularized Horseshoe']['posterior'].stan_variable("sigma")
tau_RHS_prior = prior_linreg_fit['Linreg Regularized Horseshoe']['posterior'].stan_variable("tau")
lambda_RHS_prior = prior_linreg_fit['Linreg Regularized Horseshoe']['posterior'].stan_variable("lambda_tilde")

sigma_DHS_prior = prior_linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("sigma")
tau_DHS_prior = prior_linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("tau")
lambda_DHS_prior = prior_linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("lambda_data")
xi_DHS_prior = prior_linreg_fit['Linreg Dirichlet Horseshoe']['posterior'].stan_variable("phi_data")


In [30]:
S, p = 4000, 10

kappa_RHS_prior = np.zeros((S, p))
kappa_DHS_prior = np.zeros((S, p))

for i in range(S):
    kappa_RHS_prior[i] = 1/(1+(sigma_RHS_prior[i]**2*tau_RHS_prior[i]**2*lambda_RHS_prior[i]))
    kappa_DHS_prior[i] = 1/(1+(sigma_DHS_prior[i]**2*tau_DHS_prior[i]**2*lambda_DHS_prior[i]*xi_DHS_prior[i]))



In [None]:
fig, axes = plt.subplots(len(idxs), 1, figsize=(8, 7), sharex=True, sharey=True)
fig.suptitle("Prior distributions of shrinkage ($\\kappa$)")

for row, j in enumerate(idxs):
    ax = axes[row]

    bins_kappa_prior = common_bins(
        kappa_RHS_prior[:, j],
        kappa_DHS_prior[:, j],
        bins=40,
        range=(0, 0.3)
    )

    ax.hist(kappa_RHS_prior[:, j], bins=bins_kappa_prior,
            alpha=0.6, label="RHS", density=True)
    ax.hist(kappa_DHS_prior[:, j], bins=bins_kappa_prior,
            alpha=0.6, label="DHS", density=True)

    ax.set_xlim(0, 0.2)  # focus on strong shrinkage region
    ax.set_ylabel("Density")
    ax.set_title(f"Prior kappa, {titles[j]}")

axes[-1].set_xlabel(r"$\kappa$")
handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")
fig.tight_layout(rect=[0, 0, 0.85, 0.95])
plt.show()


In [None]:
models = ["RHS", "DHS"]

fig, axes = plt.subplots(len(idxs), len(models),
                         figsize=(10, 8),
                         sharex=True, sharey=True)
fig.suptitle("Prior vs posterior shrinkage ($\\kappa$)")

for row, j in enumerate(idxs):
    for col, model in enumerate(models):
        ax = axes[row, col] if len(idxs) > 1 else axes[col]

        if model == "RHS":
            prior = kappa_RHS_prior[:, j]
            post  = kappa_RHS[:, j]
        else:  # "DHS"
            prior = kappa_DHS_prior[:, j]
            post  = kappa_DHS[:, j]

        # Common bins for fair comparison
        bins_kappa = common_bins(prior, post, bins=40, range=(0, 1.0))

        ax.hist(prior, bins=bins_kappa, alpha=0.5, density=True, label="Prior")
        ax.hist(post,  bins=bins_kappa, alpha=0.5, density=True, label="Posterior")

        ax.set_xlim(0, 1.0)
        if row == len(idxs) - 1:
            ax.set_xlabel(r"$\kappa$")
        if col == 0:
            ax.set_ylabel("Density")

        ax.set_title(f"{model}, {titles[j]}")

# One shared legend
handles, labels = axes[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")
fig.tight_layout(rect=[0, 0, 0.85, 0.95])
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

phi_prior = xi_DHS_prior  # (S, p)
phi_post  = xi_DHS        # (S, p)

p = phi_prior.shape[1]
j_idx = np.arange(p)

prior_mean = phi_prior.mean(axis=0)
post_mean  = phi_post.mean(axis=0)

width = 0.4

plt.figure(figsize=(10, 4))
plt.bar(j_idx - width/2, prior_mean, width=width, label="Prior", alpha=0.7)
plt.bar(j_idx + width/2, post_mean,  width=width, label="Posterior", alpha=0.7)

plt.xlabel("Predictor index j")
plt.ylabel(r"Mean $\xi_j$")
plt.title(r"Dirichlet weights $\xi_j$: prior vs posterior")
plt.xticks(j_idx)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
idxs = [0, 1, 2]  # or whatever indices you care about
titles = {0: r"$\beta_1$", 1: r"$\beta_2$", 2: r"$\beta_3$"}  # optional

def common_bins(*arrays, bins=40, rng=None):
    data = np.concatenate([a.ravel() for a in arrays])
    return np.histogram_bin_edges(data, bins=bins, range=rng)

fig, axes = plt.subplots(len(idxs), 1, figsize=(8, 8), sharex=True, sharey=True)
fig.suptitle(r"Prior vs posterior distributions of $\xi_j$ (DHS)")

for row, j in enumerate(idxs):
    ax = axes[row]

    prior_j = phi_prior[:, j]
    post_j  = phi_post[:, j]

    bins = common_bins(prior_j, post_j, bins=40, rng=(0, 1))

    ax.hist(prior_j, bins=bins, alpha=0.5, density=True, label="Prior")
    ax.hist(post_j,  bins=bins, alpha=0.5, density=True, label="Posterior")

    ax.set_xlim(0, 1)
    ax.set_ylabel("Density")
    ax.set_title(fr"$\xi_{{{j+1}}}$ ({titles.get(j, f'j={j}')})")

axes[-1].set_xlabel(r"$\xi_j$")
handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")
fig.tight_layout(rect=[0, 0, 0.85, 0.95])
plt.show()


## Effective scales

In [13]:
beta_RHS     = linreg_fit['Linreg Regularized Horseshoe Centered']['posterior'].stan_variable("beta")
beta_sd_RHS  = linreg_fit['Linreg Regularized Horseshoe Centered']['posterior'].stan_variable("beta_sd")

beta_DHS     = linreg_fit['Linreg Dirichlet Horseshoe Centered']['posterior'].stan_variable("beta")
beta_sd_DHS  = linreg_fit['Linreg Dirichlet Horseshoe Centered']['posterior'].stan_variable("beta_sd")


In [None]:
eps = 1e-12
idxs   = [0, 1, 2, 7]  # whatever you like
titles = {0: r"$\beta_1$", 1: r"$\beta_2$", 2: r"$\beta_3$", 7: r"$\beta_8$"}

fig, axes = plt.subplots(len(idxs), 2, figsize=(10, 10), sharex=True, sharey='row')
fig.suptitle(r"Posterior scales: $\log \beta_{\text{sd},j}$ for RHS vs DHS")

for row, j in enumerate(idxs):
    # RHS
    ax_rhs = axes[row, 0]
    log_sd_rhs = (beta_sd_RHS[:, j] + eps)
    ax_rhs.hist(log_sd_rhs, bins=40, density=True, alpha=0.7)
    ax_rhs.set_title(f"RHS, {titles[j]}")
    ax_rhs.set_xlabel(r"$\log \beta_{\text{sd},j}$")
    ax_rhs.set_ylabel("Density")

    # DHS
    ax_dhs = axes[row, 1]
    log_sd_dhs = (beta_sd_DHS[:, j] + eps)
    ax_dhs.hist(log_sd_dhs, bins=40, density=True, alpha=0.7, color="tab:orange")
    ax_dhs.set_title(f"DHS, {titles[j]}")
    ax_dhs.set_xlabel(r"$\log \beta_{\text{sd},j}$")

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()


In [None]:
fig, axes = plt.subplots(len(idxs), 2, figsize=(10, 10), sharex=True, sharey='row')
fig.suptitle(r"Joint posterior: $(\log \beta_{\text{sd},j}, \beta_j)$")

for row, j in enumerate(idxs):
    # RHS
    ax_rhs = axes[row, 0]
    log_sd_rhs = np.log(beta_sd_RHS[:, j] + eps)
    ax_rhs.scatter(log_sd_rhs, beta_RHS[:, j], alpha=0.2, s=8)
    ax_rhs.set_title(f"RHS, {titles[j]}")
    ax_rhs.set_xlabel(r"$\log \beta_{\text{sd},j}$")
    ax_rhs.set_ylabel(r"$\beta_j$")

    # DHS
    ax_dhs = axes[row, 1]
    log_sd_dhs = np.log(beta_sd_DHS[:, j] + eps)
    ax_dhs.scatter(log_sd_dhs, beta_DHS[:, j], alpha=0.2, s=8, color="tab:orange")
    ax_dhs.set_title(f"DHS, {titles[j]}")
    ax_dhs.set_xlabel(r"$\log \beta_{\text{sd},j}$")

plt.tight_layout(rect=[0, 0, 1, 0.95])
plt.show()


In [None]:
# If you want κ ignoring sigma (pure coef shrinkage):
kappa_RHS = 1 / (1 + beta_sd_RHS**2)
kappa_DHS = 1 / (1 + beta_sd_DHS**2)

fig, axes = plt.subplots(len(idxs), 1, figsize=(7, 9), sharex=True)
fig.suptitle(r"Joint posterior of $(\kappa_j, \beta_j)$ for RHS vs DHS")

for row, j in enumerate(idxs):
    ax = axes[row]
    ax.scatter(kappa_RHS[:, j], beta_RHS[:, j], alpha=0.2, s=8, label="RHS")
    ax.scatter(kappa_DHS[:, j], beta_DHS[:, j], alpha=0.2, s=8, label="DHS")
    ax.set_title(titles[j])
    ax.set_ylabel(r"$\beta_j$")
    if row == len(idxs) - 1:
        ax.set_xlabel(r"$\kappa_j$")

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc="upper right")
plt.tight_layout(rect=[0, 0, 0.85, 0.95])
plt.show()

