In [None]:
import sys, os; sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__) if '__file__' in globals() else os.getcwd(), '..')))
#import os; os.chdir(os.path.dirname(os.getcwd()))
from utils.model_loader import get_model_fits
import numpy as np
import pandas as pd
import re
from sklearn.metrics import mean_squared_error
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
data_dir = f"datasets/friedman/many"
results_dir_tanh = "results/regression/single_layer/tanh/friedman"

model_names_tanh = ["Dirichlet Horseshoe tanh", "Dirichlet Student T tanh", "Beta Horseshoe tanh", "Beta Student T tanh"]
model_names_tanh_L2 = ["Dirichlet Horseshoe tanh L2", "Dirichlet Student T tanh L2", "Beta Horseshoe tanh L2", "Beta Student T tanh L2"]

tanh_fits = {}
tanh_fits_L2 = {}

files = sorted(f for f in os.listdir(data_dir) if f.endswith(".npz"))
for fname in files:
    base_config_name = fname.replace(".npz", "")  # e.g., "GAM_N100_p8_sigma1.00_seed1"
    full_config_path = f"{base_config_name}"  # → "type_1/GAM_N100_p8_sigma1.00_seed1"
    
    tanh_fit = get_model_fits(
        config=full_config_path,
        results_dir=results_dir_tanh,
        models=model_names_tanh,
        include_prior=False,
    )
    
    tanh_fit_L2 = get_model_fits(
        config=full_config_path,
        results_dir=results_dir_tanh,
        models=model_names_tanh_L2,
        include_prior=False,
    )
    
    tanh_fits[base_config_name] = tanh_fit
    tanh_fits_L2[base_config_name] = tanh_fit_L2
    


In [None]:
data_dir = f"datasets/friedman_correlated/many"
results_dir_tanh = "results/regression/single_layer/tanh/friedman_correlated"

model_names_tanh_corr = ["Dirichlet Horseshoe tanh", "Dirichlet Student T tanh", "Beta Horseshoe tanh", "Beta Student T tanh"]
model_names_tanh_L2_corr = ["Dirichlet Horseshoe tanh L2", "Dirichlet Student T tanh L2", "Beta Horseshoe tanh L2", "Beta Student T tanh L2"]

tanh_fits_corr = {}
tanh_fits_L2_corr = {}

files = sorted(f for f in os.listdir(data_dir) if f.endswith(".npz"))
for fname in files:
    base_config_name = fname.replace(".npz", "")  # e.g., "GAM_N100_p8_sigma1.00_seed1"
    full_config_path = f"{base_config_name}"  # → "type_1/GAM_N100_p8_sigma1.00_seed1"
    
    tanh_fit_corr = get_model_fits(
        config=full_config_path,
        results_dir=results_dir_tanh,
        models=model_names_tanh_corr,
        include_prior=False,
    )
    
    tanh_fit_L2_corr = get_model_fits(
        config=full_config_path,
        results_dir=results_dir_tanh,
        models=model_names_tanh_L2_corr,
        include_prior=False,
    )
    
    tanh_fits_corr[base_config_name] = tanh_fit_corr
    tanh_fits_L2_corr[base_config_name] = tanh_fit_L2_corr
    


In [4]:
import re
import numpy as np
import pandas as pd
from properscoring import crps_ensemble
from scores.probability import crps_for_ensemble

_FRIEDMAN_KEY = re.compile(r"Friedman_N(\d+)_p\d+_sigma([\d.]+)_seed(\d+)")

def extract_friedman_metadata(key: str):
    """
    Parse 'Friedman_N{N}_p10_sigma{sigma}_seed{seed}' -> (N:int, sigma:float, seed:int)
    Returns (None, None, None) if it doesn't match.
    """
    m = _FRIEDMAN_KEY.search(key)
    if not m:
        return None, None, None
    N = int(m.group(1))
    sigma = float(m.group(2))
    seed = int(m.group(3))
    return N, sigma, seed


In [12]:
def compute_rmse_from_fits_v2(all_fits, model_names=None, folder="friedman"):
    """
    Iterate over all dataset keys in `all_fits` (e.g., relu_fits or tanh_fits).
    For each model in `model_names` (or all models found if None), compute:
      - RMSE for each posterior draw (standardized scale)
      - RMSE for each posterior draw (original scale)
      - RMSE of the posterior mean predictor (standardized scale)
      - RMSE of the posterior mean predictor (original scale)

    Returns:
        df_rmse: long DF with one row per posterior draw.
        df_posterior_rmse: one row per model/dataset with posterior-mean RMSE.
    """
    rmse_rows = []
    post_mean_rows = []

    for dataset_key, model_dict in all_fits.items():
        N, sigma, seed = extract_friedman_metadata(dataset_key)
        if N is None:
            continue

        try:
            path = f"datasets/{folder}/Friedman_N{N}_p10_sigma{sigma:.2f}_seed{seed}.npz"
            data = np.load(path)
        except FileNotFoundError:
            path = f"datasets/{folder}/many/Friedman_N{N}_p10_sigma{sigma:.2f}_seed{seed}.npz"
            data = np.load(path)

        y_test = data["y_test"].squeeze()  # (N_test,)
        #y_mean = data["y_mean"]
        y_std = data["y_std"]

        # Make these safe scalars if they are stored as arrays like shape (1,)
        y_mean = float(np.asarray(y_mean).squeeze())
        y_std = float(np.asarray(y_std).squeeze())

        # Original-scale y_test
        y_test_orig = y_test * y_std + y_mean

        models_to_eval = model_names or list(model_dict.keys())

        for model in models_to_eval:
            entry = model_dict.get(model, None)
            if not entry or "posterior" not in entry:
                print(f"[SKIP] Missing posterior: {dataset_key} -> {model}")
                continue

            fit = entry["posterior"]

            output_test = fit.stan_variable("output_test")
            if output_test.ndim == 3 and output_test.shape[-1] == 1:
                preds = output_test[..., 0]  # (S, N_test)
            elif output_test.ndim == 2:
                preds = output_test  # (S, N_test)
            else:
                raise ValueError(
                    f"Unexpected output_test shape {output_test.shape} for {dataset_key} -> {model}"
                )

            # ---- Standardized-scale RMSE per draw ----
            sq_err = (preds - y_test[None, :]) ** 2
            rmse_per_sample = np.sqrt(np.mean(sq_err, axis=1))  # (S,)

            # ---- Original-scale RMSE per draw ----
            preds_orig = preds * y_std + y_mean
            sq_err_orig = (preds_orig - y_test_orig[None, :]) ** 2
            rmse_per_sample_orig = np.sqrt(np.mean(sq_err_orig, axis=1))  # (S,)

            for s_idx, (rmse, rmse_orig) in enumerate(zip(rmse_per_sample, rmse_per_sample_orig)):
                rmse_rows.append({
                    "dataset_key": dataset_key,
                    "model": model,
                    "N": N,
                    "sigma": sigma,
                    "seed": seed,
                    "sample_idx": s_idx,
                    "rmse": float(rmse),                 # standardized
                    "rmse_orig": float(rmse_orig),       # original scale
                })

            # Posterior-mean RMSE (standardized)
            posterior_mean = preds.mean(axis=0)  # (N_test,)
            post_mean_rmse = float(np.sqrt(np.mean((posterior_mean - y_test) ** 2)))

            # Posterior-mean RMSE (original)
            posterior_mean_orig = posterior_mean * y_std + y_mean
            post_mean_rmse_orig = float(np.sqrt(np.mean((posterior_mean_orig - y_test_orig) ** 2)))

            post_mean_rows.append({
                "dataset_key": dataset_key,
                "model": model,
                "N": N,
                "sigma": sigma,
                "seed": seed,
                "posterior_mean_rmse": post_mean_rmse,                 # standardized
                "posterior_mean_rmse_orig": post_mean_rmse_orig,       # original scale
            })

    df_rmse = pd.DataFrame(rmse_rows)
    df_posterior_rmse = pd.DataFrame(post_mean_rows)
    return df_rmse, df_posterior_rmse


In [None]:
# Evaluate ReLU models
df_rmse_tanh, df_posterior_rmse_tanh = compute_rmse_from_fits_v2(
    tanh_fits, model_names_tanh
)

df_rmse_tanh_correlated, df_posterior_rmse_tanh_correlated = compute_rmse_from_fits_v2(
    tanh_fits_corr, model_names_tanh, folder = "friedman_correlated"
)

# Evaluate tanh models
df_rmse_tanh_L2, df_posterior_rmse_tanh_L2 = compute_rmse_from_fits_v2(
    tanh_fits_L2, model_names_tanh_L2
)

df_rmse_tanh_correlated_L2, df_posterior_rmse_tanh_correlated_L2 = compute_rmse_from_fits_v2(
    tanh_fits_L2_corr, model_names_tanh_L2, folder = "friedman_correlated"
)


In [None]:
print("UNCORRELATED: \n")
print("BHS L1: ", df_posterior_rmse_tanh[df_posterior_rmse_tanh['model'] == "Beta Horseshoe tanh"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DHS L1: ", df_posterior_rmse_tanh[df_posterior_rmse_tanh['model'] == "Dirichlet Horseshoe tanh"]['posterior_mean_rmse_orig'].mean(), "\n")
print("BST L1: ", df_posterior_rmse_tanh[df_posterior_rmse_tanh['model'] == "Beta Student T tanh"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DST L1: ", df_posterior_rmse_tanh[df_posterior_rmse_tanh['model'] == "Dirichlet Student T tanh"]['posterior_mean_rmse_orig'].mean(), "\n")

print("BHS L2: ", df_posterior_rmse_tanh_L2[df_posterior_rmse_tanh_L2['model'] == "Beta Horseshoe tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DHS L2: ", df_posterior_rmse_tanh_L2[df_posterior_rmse_tanh_L2['model'] == "Dirichlet Horseshoe tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")
print("BST L2: ", df_posterior_rmse_tanh_L2[df_posterior_rmse_tanh_L2['model'] == "Beta Student T tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DST L2: ", df_posterior_rmse_tanh_L2[df_posterior_rmse_tanh_L2['model'] == "Dirichlet Student T tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")

print("CORRELATED: \n")
print("BHS L1: ", df_posterior_rmse_tanh_correlated[df_posterior_rmse_tanh_correlated['model'] == "Beta Horseshoe tanh"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DHS L1: ", df_posterior_rmse_tanh_correlated[df_posterior_rmse_tanh_correlated['model'] == "Dirichlet Horseshoe tanh"]['posterior_mean_rmse_orig'].mean(), "\n")
print("BST L1: ", df_posterior_rmse_tanh_correlated[df_posterior_rmse_tanh_correlated['model'] == "Beta Student T tanh"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DST L1: ", df_posterior_rmse_tanh_correlated[df_posterior_rmse_tanh_correlated['model'] == "Dirichlet Student T tanh"]['posterior_mean_rmse_orig'].mean(), "\n")

print("BHS L2: ", df_posterior_rmse_tanh_correlated_L2[df_posterior_rmse_tanh_correlated_L2['model'] == "Beta Horseshoe tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DHS L2: ", df_posterior_rmse_tanh_correlated_L2[df_posterior_rmse_tanh_correlated_L2['model'] == "Dirichlet Horseshoe tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")
print("BST L2: ", df_posterior_rmse_tanh_correlated_L2[df_posterior_rmse_tanh_correlated_L2['model'] == "Beta Student T tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")
print("DST L2: ", df_posterior_rmse_tanh_correlated_L2[df_posterior_rmse_tanh_correlated_L2['model'] == "Dirichlet Student T tanh L2"]['posterior_mean_rmse_orig'].mean(), "\n")


In [9]:
import pandas as pd
df1 = df_rmse_tanh.assign(activation="Tanh", setting="Original")
df2 = df_rmse_tanh_L2.assign(activation="Tanh L2", setting="Original")
df3 = df_rmse_tanh_correlated.assign(activation="Tanh", setting="Correlated")
df4 = df_rmse_tanh_correlated_L2.assign(activation="Tanh L2", setting="Correlated")

df_all = pd.concat([df1, df2, df3, df4], ignore_index=True)


df1_pm = df_posterior_rmse_tanh.assign(activation="Tanh", setting="Original")
df2_pm = df_posterior_rmse_tanh_L2.assign(activation="Tanh L2", setting="Original")
df3_pm = df_posterior_rmse_tanh_correlated.assign(activation="Tanh", setting="Correlated")
df4_pm = df_posterior_rmse_tanh_correlated_L2.assign(activation="Tanh L2", setting="Correlated")

df_all_pm = pd.concat([df1_pm, df2_pm, df3_pm, df4_pm], ignore_index=True)


In [15]:
# --- prepare data ---
df = df_all.copy()

abbr = {
    "Dirichlet Horseshoe": "DHS",
    "Dirichlet Student T": "DST",    
    "Beta Horseshoe": "BHS",
    "Beta Student T": "BST",
    "Dirichlet Horseshoe L2": "DHS L2",
    "Dirichlet Student T L2": "DST L2",    
    "Beta Horseshoe L2": "BHS L2",
    "Beta Student T L2": "BST L2",
}

# unify model names across activations (strip " tanh")
df["model_clean"] = df["model"].str.replace(" tanh", "", regex=False)
df["model_clean"] = df["model_clean"].str.replace(" L2", "", regex=False)

summary_df = (
    df.groupby(["setting", "model_clean", "activation"], as_index=False)["rmse"]
      .agg(mean="mean", std="std")
)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# --- plotting order ---
settings = ["Original", "Correlated"]
models = ["Dirichlet Horseshoe", "Dirichlet Student T", "Beta Horseshoe", "Beta Student T"]
models_abbr = ["DHS", "DST", "BHS", "BST"]
activations = ["Tanh", "Tanh L2"]

# --- visuals ---
markers = {"Tanh": "o", "Tanh L2": "^"}
act_offsets = {"Tanh": -0.12, "Tanh L2": +0.12}

palette_list = plt.get_cmap("tab10").colors
palette = {m: palette_list[i] for i, m in enumerate(models)}

# x positions for models
xbase = {m: i for i, m in enumerate(models)}

fig, axes = plt.subplots(1, len(settings), figsize=(14, 4), sharey=True)

for ax, setting in zip(axes, settings):
    for m in models:
        for act in activations:
            g = summary_df[
                (summary_df["setting"] == setting) &
                (summary_df["model_clean"] == m) &
                (summary_df["activation"] == act)
            ]

            if g.empty:
                continue  # skip missing combos

            x = xbase[m] + act_offsets[act]

            ax.errorbar(
                x, float(g["mean"].iloc[0]), yerr=float(g["std"].iloc[0]),
                fmt=markers[act], markersize=10,
                linestyle="none", capsize=3,
                color=palette[m], markeredgecolor="black"
            )

    ax.set_title(setting, fontsize=15)
    ax.set_xticks(range(len(models)))
    ax.set_xticklabels(models_abbr, rotation=20, ha="right", fontsize=12)
    ax.grid(True, alpha=0.3)

axes[0].set_ylabel("RMSE", fontsize=15)

# --- legends (color = model, marker = activation) ---
model_handles = [
    Line2D([0], [0], marker="o", linestyle="none",
           color=palette[m], markeredgecolor="black",
           markersize=10, label=abbr[m])
    for m in models
]
activation_handles = [
    Line2D([0], [0], marker=markers["Tanh"], linestyle="none",
           color="black", markersize=10, label="Tanh"),
    Line2D([0], [0], marker=markers["Tanh L2"], linestyle="none",
           color="black", markersize=10, label="Tanh L2"),
]

axes[-1].legend(
    handles=model_handles + activation_handles,
    loc="upper right",
    frameon=False,
    fontsize=11
)

plt.tight_layout()
plt.show()


In [23]:
# --- prepare data ---
df = df_all_pm.copy()

abbr = {
    "Dirichlet Horseshoe": "DHS",
    "Dirichlet Student T": "DST",    
    "Beta Horseshoe": "BHS",
    "Beta Student T": "BST",
    "Dirichlet Horseshoe L2": "DHS L2",
    "Dirichlet Student T L2": "DST L2",    
    "Beta Horseshoe L2": "BHS L2",
    "Beta Student T L2": "BST L2",
}

# unify model names across activations (strip " tanh")
df["model_clean"] = df["model"].str.replace(" tanh", "", regex=False)
df["model_clean"] = df["model_clean"].str.replace(" L2", "", regex=False)

summary_df = (
    df.groupby(["setting", "model_clean", "activation"], as_index=False)["posterior_mean_rmse"]
      .agg(mean="mean", std="std")
)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D

# --- plotting order ---
settings = ["Original", "Correlated"]
models = ["Dirichlet Horseshoe", "Dirichlet Student T", "Beta Horseshoe", "Beta Student T"]
models_abbr = ["DHS", "DST", "BHS", "BST"]
activations = ["Tanh", "Tanh L2"]

# --- visuals ---
markers = {"Tanh": "o", "Tanh L2": "^"}
act_offsets = {"Tanh": -0.12, "Tanh L2": +0.12}

palette_list = plt.get_cmap("tab10").colors
palette = {m: palette_list[i] for i, m in enumerate(models)}

# x positions for models
xbase = {m: i for i, m in enumerate(models)}

fig, axes = plt.subplots(1, len(settings), figsize=(14, 4), sharey=True)

for ax, setting in zip(axes, settings):
    for m in models:
        for act in activations:
            g = summary_df[
                (summary_df["setting"] == setting) &
                (summary_df["model_clean"] == m) &
                (summary_df["activation"] == act)
            ]

            if g.empty:
                continue  # skip missing combos

            x = xbase[m] + act_offsets[act]

            ax.errorbar(
                x, float(g["mean"].iloc[0]), yerr=float(g["std"].iloc[0]),
                fmt=markers[act], markersize=10,
                linestyle="none", capsize=3,
                color=palette[m], markeredgecolor="black"
            )

    ax.set_title(setting, fontsize=15)
    ax.set_xticks(range(len(models)))
    ax.set_xticklabels(models_abbr, rotation=20, ha="right", fontsize=12)
    ax.grid(True, alpha=0.3)

axes[0].set_ylabel("Posterior mean RMSE", fontsize=15)

# --- legends (color = model, marker = activation) ---
model_handles = [
    Line2D([0], [0], marker="o", linestyle="none",
           color=palette[m], markeredgecolor="black",
           markersize=10, label=abbr[m])
    for m in models
]
activation_handles = [
    Line2D([0], [0], marker=markers["Tanh"], linestyle="none",
           color="black", markersize=10, label="Tanh"),
    Line2D([0], [0], marker=markers["Tanh L2"], linestyle="none",
           color="black", markersize=10, label="Tanh L2"),
]

axes[-1].legend(
    handles=model_handles + activation_handles,
    loc="upper right",
    frameon=False,
    fontsize=11
)

plt.tight_layout()
plt.show()


In [87]:
from utils.sparsity import forward_pass_tanh, local_prune_weights

def local_prune_weights(weights, sparsity_level, index_to_prune=0):
    """
    Apply pruning to only one weight matrix in a list, specified by index.

    Parameters:
    - weights: list of np.ndarray (e.g., [W1, W2])
    - sparsity_level: fraction of weights to prune (0.0 to 1.0)
    - index_to_prune: which weight matrix to prune in the list

    Returns:
    - list of masks (one for each weight matrix)
    """
    masks = [np.ones_like(W) for W in weights]

    W = weights[index_to_prune]
    flat = np.abs(W.flatten())
    num_to_prune = int(np.floor(sparsity_level * flat.size))

    if num_to_prune > 0:
        idx = np.argpartition(flat, num_to_prune)[:num_to_prune]
        mask_flat = np.ones_like(flat, dtype=bool)
        mask_flat[idx] = False
        masks[index_to_prune] = mask_flat.reshape(W.shape).astype(float)

    return masks


def forward_pass_tanh_L2(X, W1, b1, W2, b2, WL, bL):
    """
    Forward pass for a single layer BNN.
    """
    pre_act_1 = X @ W1 + b1.reshape(1, -1)
    post_act_1 = np.tanh(pre_act_1)
    pre_act_2 = post_act_1 @ W2 + b2.reshape(1, -1)
    post_act_2 = np.tanh(pre_act_2)
    ouput = post_act_2 @ WL + bL.reshape(1, -1)
    return ouput


def compute_sparse_rmse_results(seeds, models, all_fits, get_N_sigma, forward_pass, folder, layers=1,
                         sparsity=0.0, prune_fn=None):
    results = []
    posterior_means = []

    for seed in seeds:
        N, sigma = get_N_sigma()
        dataset_key = f'Friedman_N{N}_p10_sigma{sigma:.1f}_seed{seed}'
        path = f"datasets/{folder}/{dataset_key}.npz"

        try:
            data = np.load(path)
            X_test, y_test = data["X_test"], data["y_test"]
        except FileNotFoundError:
            print(f"[SKIP] File not found: {path}")
            continue

        for model in models:
            try:
                fit = all_fits[dataset_key][model]['posterior']
                W1_samples = fit.stan_variable("W_1")           # (S, P, H)
                if layers == 2:
                    W2_samples = fit.stan_variable("W_2")           # (S, P, H)
                WL_samples = fit.stan_variable("W_L")           # (S, H, O)
                b_samples = fit.stan_variable("hidden_bias")   # (S, L, H)
                b1_samples = b_samples[:, 0, :]
                if layers == 2:
                    b2_samples = b_samples[:, 1, :]
                bL_samples = fit.stan_variable("output_bias")   # (S, O)
            except KeyError:
                print(f"[SKIP] Model or posterior not found: {dataset_key} -> {model}")
                continue

            S = W1_samples.shape[0]
            rmses = np.zeros(S)
            #print(y_test.shape)
            y_hats = np.zeros((S, y_test.shape[0]))

            for i in range(S):
                W1 = W1_samples[i]
                if layers == 2:
                    W2 = W2_samples[i]
                WL = WL_samples[i]

                # Apply pruning mask if requested
                if prune_fn is not None and sparsity > 0.0:
                    mask_W1 = prune_fn([W1, WL], sparsity)
                    if layers == 2:
                        mask_W2 = prune_fn([W1, W2], sparsity, index_to_prune=1)
                    W1 = W1 * mask_W1[0]
                    if layers == 2:
                        W2 = W2 * mask_W2[1]
                    # if i == 0:
                    #     print(W2)
                if layers == 1:
                    y_hat = forward_pass(X_test, W1, b1_samples[i], WL, bL_samples[i])
                    y_hats[i] = y_hat.squeeze()  # Store the prediction for each sample
                    rmses[i] = np.sqrt(np.mean((y_hat.squeeze() - y_test)**2))
                else:
                    y_hat = forward_pass(X_test, W1, b1_samples[i], W2, b2_samples[i], WL, bL_samples[i])
                    y_hats[i] = y_hat.squeeze()
                    rmses[i] = np.sqrt(np.mean((y_hat.squeeze() - y_test)**2))
                
            posterior_mean = np.mean(y_hats, axis=0)
            posterior_mean_rmse = np.sqrt(np.mean((posterior_mean - y_test.squeeze())**2))

            posterior_means.append({
                'seed': seed,
                'N': N,
                'sigma': sigma,
                'model': model,
                'sparsity': sparsity,
                'posterior_mean_rmse': posterior_mean_rmse
            })

            for i in range(S):
                results.append({
                    'seed': seed,
                    'N': N,
                    'sigma': sigma,
                    'model': model,
                    'sparsity': sparsity,
                    'rmse': rmses[i]
                })

    df_rmse = pd.DataFrame(results)
    df_posterior_rmse = pd.DataFrame(posterior_means)

    return df_rmse, df_posterior_rmse

sparsity_levels = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.97, 0.99]
#sparsity_levels = [0.0, 0.1, 0.95]


seeds = [1, 2, 3, 4, 5]

def get_N_sigma():
    N=100
    sigma=5.00
    return N, sigma

In [88]:
df_rmse_sparse_tanh, df_posterior_rmse_sparse_tanh = {}, {}
df_rmse_sparse_tanh_L2, df_posterior_rmse_sparse_tanh_L2 = {}, {}
df_rmse_sparse_tanh_correlated, df_posterior_rmse_sparse_tanh_correlated = {}, {}
df_rmse_sparse_tanh_correlated_L2, df_posterior_rmse_sparse_tanh_correlated_L2 = {}, {}

for sparsity in sparsity_levels:
    df_rmse_sparse_tanh[sparsity], df_posterior_rmse_sparse_tanh[sparsity] = compute_sparse_rmse_results(
        seeds, model_names_tanh, tanh_fits, get_N_sigma, forward_pass_tanh, folder = "friedman_std_5",
        sparsity=sparsity, prune_fn=local_prune_weights
    )
    
    df_rmse_sparse_tanh_L2[sparsity], df_posterior_rmse_sparse_tanh_L2[sparsity] = compute_sparse_rmse_results(
        seeds, model_names_tanh_L2, tanh_fits_L2, get_N_sigma, forward_pass_tanh_L2, folder = "friedman_std_5", layers=2,
        sparsity=sparsity, prune_fn=local_prune_weights
    )
    
    df_rmse_sparse_tanh_correlated[sparsity], df_posterior_rmse_sparse_tanh_correlated[sparsity] = compute_sparse_rmse_results(
        seeds, model_names_tanh, tanh_fits_correlated, get_N_sigma, forward_pass_tanh, folder = "friedman_std_5_correlated",
        sparsity=sparsity, prune_fn=local_prune_weights
    )
    
    df_rmse_sparse_tanh_correlated_L2[sparsity], df_posterior_rmse_sparse_tanh_correlated_L2[sparsity] = compute_sparse_rmse_results(
        seeds, model_names_tanh_L2, tanh_fits_correlated_L2, get_N_sigma, forward_pass_tanh_L2, folder = "friedman_std_5_correlated", layers=2,
        sparsity=sparsity, prune_fn=local_prune_weights
    )

In [89]:
import pandas as pd

df_rmse_full_tanh = pd.concat(
    [df.assign(sparsity=sparsity) for sparsity, df in df_rmse_sparse_tanh.items()],
    ignore_index=True
)

df_rmse_full_tanh_L2 = pd.concat(
    [df.assign(sparsity=sparsity) for sparsity, df in df_rmse_sparse_tanh_L2.items()],
    ignore_index=True
)

df_rmse_full_tanh_correlated = pd.concat(
    [df.assign(sparsity=sparsity) for sparsity, df in df_rmse_sparse_tanh_correlated.items()],
    ignore_index=True
)

df_rmse_full_tanh_correlated_L2 = pd.concat(
    [df.assign(sparsity=sparsity) for sparsity, df in df_rmse_sparse_tanh_correlated_L2.items()],
    ignore_index=True
)


In [90]:
df_tanh_o = df_rmse_full_tanh.copy()
df_tanh_o["model"] = df_tanh_o["model"].str.replace(" tanh", "", regex=False)

df_tanh_o_L2 = df_rmse_full_tanh_L2.copy()
df_tanh_o_L2["model"] = df_tanh_o_L2["model"].str.replace(" tanh L2", "", regex=False)

df_tanh_c = df_rmse_full_tanh_correlated.copy()
df_tanh_c["model"] = df_tanh_c["model"].str.replace(" tanh", "", regex=False)

df_tanh_c_L2 = df_rmse_full_tanh_correlated_L2.copy()
df_tanh_c_L2["model"] = df_tanh_c_L2["model"].str.replace(" tanh L2", "", regex=False)


In [91]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def add_meta(df, setting, depth):
    out = df.copy()
    out["setting"] = setting      # "Correlated" or "Original"
    out["depth"] = depth          # "1HL" or "2HL"
    out["group"] = f"{setting} | {depth}"
    return out

df_all = pd.concat([
    add_meta(df_tanh_c,     "Corr", "1"),
    add_meta(df_tanh_c_L2,  "Corr", "2"),
    add_meta(df_tanh_o,     "Uncorr",   "1"),
    add_meta(df_tanh_o_L2,  "Uncorr",   "2"),
], ignore_index=True)

# (optional) ensure numeric + sorted sparsity
df_all["sparsity"] = pd.to_numeric(df_all["sparsity"])
df_all["rmse"] = pd.to_numeric(df_all["rmse"])


In [None]:
def plot_seed_curve(df_all, seed=1, sigma=None, N=None):
    d = df_all[df_all["seed"] == seed].copy()
    if sigma is not None:
        d = d[d["sigma"] == sigma]
    if N is not None:
        d = d[d["N"] == N]

    # Average duplicates at same sparsity (common if you have multiple runs/replicates)
    dsum = (d.groupby(["group", "model", "sparsity"], as_index=False)["rmse"]
              .mean()
              .sort_values("sparsity"))

    groups = dsum["group"].unique()
    fig, axes = plt.subplots(1, len(groups), figsize=(14, 4), sharey=False)

    if len(groups) == 1:
        axes = [axes]

    for ax, grp in zip(axes, groups):
        dg = dsum[dsum["group"] == grp]
        for model, g in dg.groupby("model"):
            ax.plot(g["sparsity"], g["rmse"], marker="o", linewidth=2, label=model)

        ax.set_title(f"{grp} (seed={seed}" +
                     (f", N={N}" if N is not None else "") +
                     (f", sigma={sigma}" if sigma is not None else "") +
                     ")", fontsize=12)
        ax.set_xlabel("Sparsity")
        ax.grid(True, alpha=0.3)

    axes[0].set_ylabel("RMSE")
    axes[-1].legend(loc="best", frameon=False)
    plt.tight_layout()
    plt.show()

# Example usage:
plot_seed_curve(df_all, seed=1, N=100, sigma=5.0)


In [None]:
def plot_mean_curve(df_all, sigma=None, N=None, band="std"):
    d = df_all.copy()
    if sigma is not None:
        d = d[d["sigma"] == sigma]
    if N is not None:
        d = d[d["N"] == N]

    # mean/std across seeds at each sparsity for each model+group
    dsum = (d.groupby(["group", "model", "sparsity"], as_index=False)["rmse"]
              .agg(mean="mean", std="std")
              .sort_values("sparsity"))

    groups = dsum["group"].unique()
    fig, axes = plt.subplots(1, len(groups), figsize=(14, 4), sharey=False)

    if len(groups) == 1:
        axes = [axes]

    for ax, grp in zip(axes, groups):
        dg = dsum[dsum["group"] == grp]
        for model, g in dg.groupby("model"):
            ax.plot(g["sparsity"], g["mean"], linewidth=2, label=model)
            if band == "std":
                ax.fill_between(
                    g["sparsity"],
                    g["mean"] - g["std"],
                    g["mean"] + g["std"],
                    alpha=0.15
                )
            elif band == "errorbar":
                ax.errorbar(g["sparsity"], g["mean"], yerr=g["std"], fmt="none", capsize=2)

        ax.set_title(f"{grp} (mean over seeds" +
                     (f", N={N}" if N is not None else "") +
                     (f", sigma={sigma}" if sigma is not None else "") +
                     ")", fontsize=12)
        ax.set_xlabel("Sparsity")
        ax.grid(True, alpha=0.3)

    axes[0].set_ylabel("RMSE (mean over seeds)")
    axes[-1].legend(loc="best", frameon=False)
    plt.tight_layout()
    plt.show()

# Example usage:
plot_mean_curve(df_all, N=100, sigma=5.0, band="")


#### ORIGINAL DATASET - ENDRE RUN_REGRESSION.py
python3 utils/run_all_regression_models.py --model beta_tau_tanh --output_dir results/regression/single_layer/relu/friedman/no_lambda &&
python3 utils/run_all_regression_models.py --model dirichlet_tau_tanh --output_dir results/regression/single_layer/relu/friedman/no_lambda &&

python3 utils/run_all_regression_models.py --model beta --output_dir results/regression/single_layer/relu/friedman/no_lambda &&
python3 utils/run_all_regression_models.py --model dirichlet --output_dir results/regression/single_layer/relu/friedman/no_lambda &&

python3 utils/run_all_regression_models.py --model beta_tanh --output_dir results/regression/single_layer/relu/friedman/no_lambda &&
python3 utils/run_all_regression_models.py --model dirichlet_tanh --output_dir results/regression/single_layer/relu/friedman/no_lambda

In [None]:
import numpy as np
from scipy.stats import dirichlet, beta

np.random.seed(42)

K = 10
alpha = 0.1

# Dirichlet draw
dirichlet_draw = dirichlet.rvs(alpha=np.full(K, alpha), size=1)[0]

# Independent Beta draws (same marginals)
beta_draw = beta.rvs(alpha, (K - 1) * alpha, size=K)

print("Dirichlet draw:")
print(dirichlet_draw)
print("Sum:", dirichlet_draw.sum())

print("\nIndependent Beta draws:")
print(beta_draw)
print("Sum:", beta_draw.sum())


In [None]:
N = 100_000

dirichlet_sums = np.ones(N)  # always 1
beta_sums = beta.rvs(alpha, (K - 1) * alpha, size=(N, K)).sum(axis=1)

print("Beta sum: mean =", beta_sums.mean())
print("Beta sum: std  =", beta_sums.std())
print("Beta sum: min/max =", beta_sums.min(), beta_sums.max())


In [None]:
dirichlet_samples = dirichlet.rvs(
    alpha=np.full(K, alpha), size=N
)

beta_samples = beta.rvs(
    alpha, (K - 1) * alpha, size=(N, K)
)

print("Dirichlet off-diagonal correlation (approx):",
      np.corrcoef(dirichlet_samples.T)[0, 1])

print("Beta off-diagonal correlation (approx):",
      np.corrcoef(beta_samples.T)[0, 1])


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import dirichlet, beta

# --- Settings ---
np.random.seed(42)
K = 10
alpha = 0.1
N = 80_000  # Monte Carlo draws

# --- 1) Draw variance vectors ---
v_dir = dirichlet.rvs(alpha=np.full(K, alpha), size=N)            # sums to 1
v_beta = beta.rvs(alpha, (K - 1) * alpha, size=(N, K))            # iid, same marginals

# --- 2) Draw weights with those variances: w_i ~ N(0, v_i) ---
w1 = np.random.normal(loc=0.0, scale=v_dir, size=(N, K))   # Dirichlet variances
w2 = np.random.normal(loc=0.0, scale=v_beta, size=(N, K))  # Beta variances

# --- 3) Diagnostics for plotting ---
sum_dir = v_dir.sum(axis=1)      # == 1
sum_beta = v_beta.sum(axis=1)

max_dir = v_dir.max(axis=1)
max_beta = v_beta.max(axis=1)

l2_w1 = np.linalg.norm(w1, axis=1)
l2_w2 = np.linalg.norm(w2, axis=1)

maxabs_w1 = np.max(np.abs(w1), axis=1)
maxabs_w2 = np.max(np.abs(w2), axis=1)

# Correlation matrices (neutral scaling: always plot on [-1, 1])
corr_dir = np.corrcoef(v_dir.T)
corr_beta = np.corrcoef(v_beta.T)

# ------------------------------------------------------------
# PLOT A: Sum of variances (Dirichlet fixed at 1 vs Beta random)
# ------------------------------------------------------------
plt.figure()
plt.hist(sum_beta, bins=80, density=True, alpha=0.8,
         label='Sum of iid Beta variances')
plt.axvline(1.0, linewidth=2, label='Dirichlet sum (always 1)')
plt.xlim(0, K)
plt.xlabel('Sum of 10 variance components')
plt.ylabel('Density')
plt.title('Sum constraint: Dirichlet vs iid Betas (same marginals)')
plt.legend()
plt.show()

# ------------------------------------------------------------
# PLOT B: Joint behavior (v1 vs v2) scatter with fixed axes [0,1]
# ------------------------------------------------------------
m = 6000
idx = np.random.choice(N, size=m, replace=False)

plt.figure()
plt.scatter(v_dir[idx, 0], v_dir[idx, 1], s=10, alpha=0.3,
            label='Dirichlet (v1 vs v2)')
plt.scatter(v_beta[idx, 0], v_beta[idx, 1], s=10, alpha=0.3,
            label='iid Betas (v1 vs v2)')
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.xlabel('v1')
plt.ylabel('v2')
plt.title('Two components (fixed [0,1] axes)')
plt.legend()
plt.show()

# ------------------------------------------------------------
# PLOT C: Correlation heatmap (Dirichlet) with neutral fixed scale
# ------------------------------------------------------------
plt.figure()
plt.imshow(corr_dir, vmin=-1, vmax=1, interpolation='nearest')
plt.colorbar(label='Correlation')
plt.title('Corr(variance components): Dirichlet (fixed scale [-1,1])')
plt.xticks(range(K))
plt.yticks(range(K))
plt.show()

# ------------------------------------------------------------
# PLOT D: Correlation heatmap (iid Betas) with neutral fixed scale
# ------------------------------------------------------------
plt.figure()
plt.imshow(corr_beta, vmin=-1, vmax=1, interpolation='nearest')
plt.colorbar(label='Correlation')
plt.title('Corr(variance components): iid Betas (fixed scale [-1,1])')
plt.xticks(range(K))
plt.yticks(range(K))
plt.show()

# ------------------------------------------------------------
# PLOT E: Induced weight scale comparison via ||w||_2
# ------------------------------------------------------------
xmax = float(np.quantile(np.concatenate([l2_w1, l2_w2]), 0.995))

plt.figure()
plt.hist(l2_w1, bins=120, density=True, alpha=0.6,
         label=r'$\|w_1\|_2$,  $v\sim$ Dirichlet,  $w|v\sim N(0,\mathrm{diag}(v))$')
plt.hist(l2_w2, bins=120, density=True, alpha=0.6,
         label=r'$\|w_2\|_2$,  $v_i\sim$ iid Beta,  $w|v\sim N(0,\mathrm{diag}(v))$')
plt.xlim(0, xmax)
plt.xlabel(r'$\|w\|_2$')
plt.ylabel('Density')
plt.title('Weights induced by variance vectors: Dirichlet vs iid Betas')
plt.legend()
plt.show()

# (Optional) PLOT F: Induced max |w_i|
xmax2 = float(np.quantile(np.concatenate([maxabs_w1, maxabs_w2]), 0.995))

plt.figure()
plt.hist(maxabs_w1, bins=120, density=True, alpha=0.6, label=r'$\max_i |w_{1,i}|$')
plt.hist(maxabs_w2, bins=120, density=True, alpha=0.6, label=r'$\max_i |w_{2,i}|$')
plt.xlim(0, xmax2)
plt.xlabel(r'$\max_i |w_i|$')
plt.ylabel('Density')
plt.title('Induced extreme weights: Dirichlet vs iid Betas')
plt.legend()
plt.show()
