In [1]:
import numpy as np
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

def generate_two_moons(N=200, sigma=0.5, test_size=0.2, D=2, seed=42):
    """
    Generate a two moons dataset with optional redundant features and split into training and test sets.

    Parameters:
    n_samples (int): Number of samples to generate.
    noise (float): Standard deviation of Gaussian noise added to the data.
    test_size (float): Proportion of the dataset to include in the test split.
    n_features (int): Total number of features (must be >=2). Extra features will be random noise.
    random_state (int): Random seed for reproducibility.

    Returns:
    X_train (ndarray): Training features.
    X_test (ndarray): Test features.
    y_train (ndarray): Training labels.
    y_test (ndarray): Test labels.
    """
    if D < 2:
        raise ValueError("n_features must be at least 2.")

    X, y = make_moons(n_samples=N, noise=sigma, random_state=seed)
    
    y = np.where(y == 0, 1, 2)
    
    if D > 2:
        rng = np.random.default_rng(seed=seed)
        extra_features = rng.normal(0, 1, size=(N, D - 2))
        X = np.hstack((X, extra_features))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)
    return X_train, X_test, y_train, y_test

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mpmath import hyper

p=10
alpha=1.0
b=(p-1)*alpha

# Funksjonen p(kappa | sigma, tau)
def p_kappa_dirichlet_horseshoe(kappa, a_j=1.0, p=10):
    if kappa <= 0 or kappa >= 1:
        return 0.0
    prefactor = (1/np.pi) * (a_j / ((1-kappa) * np.sqrt(kappa) * np.sqrt(1-kappa))) * (1/p)
    z = -kappa / (1-kappa)
    # {}_3F_2([1, 1.1/2, 2.1/1], [1, 3/2], z)
    hyper_val = hyper([p*alpha, (alpha+1)/2, alpha/2 + 1], [(p*alpha+1)/2, p*alpha/2 + 1], z)
    return float(prefactor * hyper_val)

# Funksjonen p(kappa | sigma, tau)
def p_kappa_horseshoe(kappa, a_j=1.0):
    if kappa <= 0 or kappa >= 1:
        return 0.0
    prefactor = (1/np.pi) * (a_j / ((a_j**2-1)*kappa + 1)) * 1/(np.sqrt(kappa) * np.sqrt(1-kappa))
    return float(prefactor)

# Lag kappa-grid
kappa_vals = np.linspace(0.001, 0.999, 500)
p_vals_dirichlet_horseshoe = [p_kappa_dirichlet_horseshoe(k) for k in kappa_vals]

p_vals_horseshoe = [p_kappa_horseshoe(k) for k in kappa_vals]


In [None]:
# Plot
plt.plot(kappa_vals, p_vals_dirichlet_horseshoe, label="Dirichlet Horseshoe")
plt.plot(kappa_vals, p_vals_horseshoe, label="Horseshoe")
plt.xlabel(r"$\kappa$")
plt.ylabel(r"$p(\kappa \mid \sigma, \tau)$")
plt.title(r"Density")
plt.legend()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# --- Parameters ---
a = 1.0          # your a_j
p = 10
alpha = 0.1
n_mc = 20000     # MC samples for xi
n_draw = 20000   # how many kappa samples to draw for the histogram
grid_n = 600     # kappa grid resolution
eps = 1e-4       # keep away from 0 and 1 to avoid singularities
rng = np.random.default_rng(123)

# --- Draw xi once and reuse across all kappa (variance reduction) ---
xi = rng.beta(alpha, (p - 1) * alpha, size=n_mc)

# --- Build kappa grid and compute unnormalized density on the grid ---
kappa_grid = np.linspace(eps, 1 - eps, grid_n)

# Prefactor depends only on kappa
prefactor = (1 / np.pi) * (a / ((1 - kappa_grid) * np.sqrt(kappa_grid) * np.sqrt(1 - kappa_grid)))

# c(kappa) = kappa * a^2 / (1 - kappa)
c_grid = (kappa_grid * a * a) / (1 - kappa_grid)

# Monte Carlo expectation for each kappa in a vectorized way
# E[ xi / (1 + c xi^2) ] â‰ˆ mean over xi; do it for all c in c_grid
# Shape tricks: (grid_n, 1) vs (1, n_mc) to broadcast
num = xi[None, :]                # shape (1, n_mc)
den = 1.0 + (c_grid[:, None] * xi[None, :]**2)  # shape (grid_n, n_mc)
exp_term = (num / den).mean(axis=1)  # shape (grid_n,)

dens_unnorm = prefactor * exp_term
dens_unnorm = np.clip(dens_unnorm, 0, np.inf)   # numerical safety

# --- Turn grid density into a discrete sampling distribution ---
probs = dens_unnorm / dens_unnorm.sum()

# --- Sample kappa from the discretized density and plot histogram ---
kappa_samples = rng.choice(kappa_grid, size=n_draw, p=probs)

plt.figure(figsize=(7, 4))
plt.hist(kappa_samples, bins=100, density=True, alpha=0.5, edgecolor='none', label="samples (hist)")
# Overlay MC density (rescale by grid spacing so the curve is comparable to a density)
dx = kappa_grid[1] - kappa_grid[0]
plt.plot(kappa_grid, dens_unnorm / (dens_unnorm.sum() * dx), lw=2, label="MC density (overlay)")
plt.xlabel(r"$\kappa$")
plt.ylabel("density")
plt.ylim(0, 10)
plt.title(r"MC estimate of $p(\kappa\,|\,\sigma,\tau)$ with $c(\kappa)=\kappa a^2/(1-\kappa)$")
plt.legend()
plt.tight_layout()
plt.show()


In [42]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import gamma, factorial
from mpmath import hyper

p=10
alpha=1.0
nu = 1
b=(p-1)*alpha

# Funksjonen p(kappa | sigma, tau)
def p_kappa_dirichlet_stud_t(kappa, one_or_two=1, a_j=1.0, p=10):
    if kappa <= 0 or kappa >= 1:
        return 0.0
    prefactor_1 = (1/a_j)*(1/(np.sqrt(np.pi*nu))) * (gamma((nu+1)/2))/(gamma(nu/2)) * kappa**(nu/2 - 1)*np.sqrt(1-kappa)
    prefactor_2 = (1/a_j)*(1/(np.sqrt(np.pi*nu))) * (gamma((nu+1)/2))/(gamma(nu/2)) * kappa**(nu/2 - 1)*(1/(1-kappa)**(3/2)) * gamma(alpha + nu)/(gamma(alpha)) * gamma(p*alpha)/(gamma(p*alpha+nu))
    
    prefactor = (1/np.pi) * (a_j / ((1-kappa) * np.sqrt(kappa) * np.sqrt(1-kappa))) * (1/p)
    
    if one_or_two == 1:
        z_1 = (-kappa*nu) / (1-kappa)
        hyper_val = hyper([1, (alpha+1)/2, alpha/2 + 1], [(p*alpha+1)/2, p*alpha/2 + 1], z_1)
        return float(prefactor_1 * hyper_val)
    elif one_or_two == 2:
        z_2 = (-kappa*nu) / (1-kappa)
        hyper_val = hyper([(nu+1)/2, (alpha+nu)/2, (alpha+nu+1)/2], [(p*alpha+nu)/2, (p*alpha+nu+1)/2], z_2)
        return float(prefactor_2 * hyper_val)
    else:
        return(print("You fucked up"))


# Lag kappa-grid
kappa_vals = np.linspace(0.001, 0.999, 500)
p_vals_dirichlet_stud_t = [p_kappa_dirichlet_stud_t(k, 2) for k in kappa_vals]


In [None]:
plt.plot(kappa_vals, p_vals_dirichlet_stud_t, label="Dirichlet Student T")
#plt.plot(kappa_vals, p_vals_horseshoe, label="Horseshoe")
plt.xlabel(r"$\kappa$")
plt.ylabel(r"$p(\kappa \mid \sigma, \tau)$")
plt.title(r"Density")
plt.legend()
plt.show()

In [None]:
gamma(alpha + nu)/(gamma(alpha))*gamma(p*alpha)/(gamma(p*alpha + nu))

In [14]:
X_train, X_test, y_train, y_test = generate_two_moons(N=200, sigma=0.5, test_size=0.2, D=5, seed=42)

In [None]:
y_train

In [1]:
path_1 = "datasets/moons/many/Moon_N100_p2_sigma0.20_seed3.npz"

path_2 = "datasets/moons/many/Moon_N200_p2_sigma0.20_seed7.npz"

path_3 = "datasets/moons/many/Moon_N100_p2_sigma0.20_seed11.npz"

In [None]:
X_train, X_test, y_train, y_test = np.load(path_3)["X_train"], np.load(path_3)["X_test"], np.load(path_3)["y_train"], np.load(path_3)["y_test"]


In [None]:
import matplotlib.pyplot as plt

#X_train, X_test, y_train, y_test = generate_two_moons(N=200, sigma=0.5, test_size=0.2, D=2, seed=42)

plt.figure(figsize=(6, 5))
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='bwr', edgecolor='k')
plt.title("Training Data from generate_two_moons (sigma=0.5)")
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import laplace, t, gamma

# Parameters
b = 1.0
n_samples = 10000000
x = np.linspace(-10, 10, 1000)

# True Laplace PDF
laplace_pdf = laplace.pdf(x, loc=0, scale=b)

# Sample from the t2-Gamma mixture
xi_samples = gamma.rvs(a=1, scale=2 * b**2, size=n_samples)
t_samples = t.rvs(df=2, size=n_samples)
w_samples = np.sqrt(xi_samples) * t_samples

# Create histogram using same binning as PDF evaluation
hist_vals, bin_edges = np.histogram(w_samples, bins=200, range=(-10, 10), density=True)
bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])

# Plot
plt.figure(figsize=(10, 6))
plt.plot(x, laplace_pdf, label='Laplace(0, b)', linewidth=2)
plt.plot(bin_centers, hist_vals, label='t2-Gamma mixture (hist)', linestyle='--', linewidth=2)
plt.title("Laplace vs t2-Gamma Scale Mixture")
plt.xlabel("w")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import erf
from scipy.stats import laplace

# Parameters
b = 1.0
a = 4 * b**2

# Define the function based on the symbolic integral expression
def symbolic_integral(w, a):
    sqrt_pi = np.sqrt(np.pi)
    sqrt_b = np.abs(w)
    sqrt_a = np.sqrt(a)
    erf_term = erf(sqrt_b / sqrt_a)
    exp_term = np.exp(sqrt_b**2 / a)
    
    numerator = (
        (2 * sqrt_pi * a * erf_term - 2 * sqrt_pi * a) * sqrt_b**2
        - sqrt_pi * a**2 * erf_term
        + sqrt_pi * a**2
    ) * exp_term + 2 * a**1.5 * sqrt_b
    denominator = 2 * a**(2.5)
    
    return numerator / denominator

# w values (positive only for this check)
w_vals = np.linspace(0.01, 5, 500)  # Avoid w = 0 to prevent division by zero

# Evaluate symbolic expression and Laplace density
integral_vals = np.array([symbolic_integral(w, a) for w in w_vals])
laplace_vals = laplace.pdf(w_vals, loc=0, scale=b)

# Plotting
plt.figure(figsize=(10, 6))
plt.plot(w_vals, integral_vals, label='Symbolic integral expression', linestyle='--')
plt.plot(w_vals, laplace_vals, label='Laplace(0, b)', linewidth=2)
plt.title('Comparison: Symbolic Integral vs Laplace Density')
plt.xlabel('w')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gamma, t, laplace
from scipy.integrate import quad

# Parameters
b = 1.36  # Laplace scale
df = 4   # degrees of freedom for Student-t
rate = 1 / (2 * b**2)  # Gamma(1, rate) for scale prior

# Define the integrand: Student-t density times Gamma density
def marginal_density(w):
    def integrand(xi):
        t_density = t.pdf(w, df=df, loc=0, scale=np.sqrt(xi))
        gamma_density = gamma.pdf(xi, a=1, scale=1/rate)
        return t_density * gamma_density
    result, _ = quad(integrand, 0, np.inf)
    return result

# Evaluate on a grid of w values
w_vals = np.linspace(-10, 10, 500)
marginal_vals = np.array([marginal_density(w) for w in w_vals])
b

In [None]:
b_1 = 1.36  # Laplace scale
b_2 = 2.0  # Laplace scale
b_3 = 3.0  # Laplace scale
# Plot the results
laplace_vals_1 = laplace.pdf(w_vals, loc=0, scale=b_1)
laplace_vals_2 = laplace.pdf(w_vals, loc=0, scale=b_2)
laplace_vals_3 = laplace.pdf(w_vals, loc=0, scale=b_3)
plt.figure(figsize=(10, 6))
plt.plot(w_vals, marginal_vals, label='Student-t4 + Gamma mixture', linestyle='--')
plt.plot(w_vals, laplace_vals_1, label='Laplace(0, 1.36)', linewidth=2)
#plt.plot(w_vals, laplace_vals_2, label='Laplace(0, 2)', linewidth=2)
#plt.plot(w_vals, laplace_vals_3, label='Laplace(0, 3)', linewidth=2)
plt.title("Marginal density from Student-t4 + Gamma vs. Laplace")
plt.xlabel("w")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gamma, t, laplace
from scipy.integrate import quad

# Parameters
df = 50              # degrees of freedom for Student-t
alpha = 1.0          # shape parameter for Gamma
beta = 1.0           # rate parameter for Gamma (note: rate = 1/scale in SciPy)
rate = beta          # used in gamma.pdf(x, a=alpha, scale=1/rate)

# Compute matching Laplace scale b
b_match = np.sqrt((df * alpha) / (2 * beta * (df - 2)))

print(f"Matched Laplace scale b = {b_match:.4f}")

# Define the marginal density by integrating out xi
def marginal_density(w):
    def integrand(xi):
        t_density = t.pdf(w, df=df, loc=0, scale=np.sqrt(xi))
        gamma_density = gamma.pdf(xi, a=alpha, scale=1/rate)
        return t_density * gamma_density
    result, _ = quad(integrand, 0, np.inf, epsabs=1e-10, epsrel=1e-10)
    return result

# Evaluate densities over a grid
w_vals = np.linspace(-10, 10, 500)
marginal_vals = np.array([marginal_density(w) for w in w_vals])
laplace_vals = laplace.pdf(w_vals, loc=0, scale=b_match)

# Plotting
plt.figure(figsize=(10, 6))
plt.plot(w_vals, marginal_vals, label='Student-t4 + Gamma mixture', linestyle='--')
plt.plot(w_vals, laplace_vals, label=f'Laplace(0, {b_match:.2f})', linewidth=2, color='orange')
plt.title("Marginal density from Student-t4 + Gamma vs. Laplace")
plt.xlabel("w")
plt.ylabel("Density")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
