In [None]:
import numpy as np

# Load the exact Julia-saved X
X = np.loadtxt("../data/X_data.csv", delimiter=",")
print("Loaded X shape:", X.shape)

coord = np.loadtxt("../data/coord_data.csv", delimiter=",")


In [None]:
from sklearn.metrics import pairwise_distances

C = pairwise_distances(coord.reshape(-1, 1), metric="sqeuclidean")
C = C / C.mean()  # same normalization as the notebook

eps = 0.025
K = np.exp(-C / eps)

In [None]:
import numpy as np
import pytest
from sklearn.metrics import pairwise_distances

import sys
from pathlib import Path

# Insert "src" into sys.path so we can import the local wassnmf package
sys.path.insert(0, "../src")
from wassnmf.wassnmf import WassersteinNMF

In [None]:
def f(x, mu, sigma=1.0):
    """Mimic the Julia f(coord, μ, σ): exp.(-(x .- μ).^2)."""
    return np.exp(-(x - mu)**2 / (2 * sigma**2))  # Gaussian bump

In [None]:
np.random.seed(42)
n_features = 100
n_samples = 100
coord = np.linspace(-12, 12, n_features)
X = np.zeros((n_features, n_samples), dtype=np.float64)

# Generate data as sums of 3 random Gaussian bumps per column
sigma = 1.0
for j in range(n_samples):
    bump1 = np.random.rand() * f(coord, sigma * np.random.randn() + 6, sigma=1.0)
    bump2 = np.random.rand() * f(coord, sigma * np.random.randn(), sigma=1.0)
    bump3 = np.random.rand() * f(coord, sigma * np.random.randn() - 6, sigma=1.0)
    X[:, j] = bump1 + bump2 + bump3

# Normalize columns to sum to 1 (probability simplex)
X /= X.sum(axis=0, keepdims=True)

print("X shape:", X.shape)


In [None]:
# Build cost matrix C from the same coordinate range [-12, 12]
C = pairwise_distances(coord.reshape(-1, 1), metric='sqeuclidean')
C /= C.mean()

# Convert cost matrix to kernel
eps = 0.025
K = np.exp(-C / eps)

print("C shape:", C.shape, "  K shape:", K.shape)


In [None]:
# Instantiate and run WassersteinNMF with the same parameters as the Julia notebook
wnmf = WassersteinNMF(
    n_components=3,
    epsilon=eps,
    rho1=0.05,
    rho2=0.05,
    n_iter=10,
    verbose=True
)

D, Lambda = wnmf.fit_transform(X, K)
print("D shape:", D.shape)
print("Lambda shape:", Lambda.shape)


In [None]:
# Verify shape, non-negativity, and column sums
assert D.shape == (n_features, 3), f"Expected D shape {(n_features, 3)}, got {D.shape}"
assert Lambda.shape == (3, n_samples), f"Expected Lambda shape {(3, n_samples)}, got {Lambda.shape}"
assert np.all(D >= 0), "D contains negative values"
assert np.all(Lambda >= 0), "Lambda contains negative values"

d_col_sums = D.sum(axis=0)
lambda_col_sums = Lambda.sum(axis=0)
np.testing.assert_allclose(d_col_sums, 1.0, atol=1e-4, err_msg="D columns do not sum to 1")
np.testing.assert_allclose(lambda_col_sums, 1.0, atol=1e-4, err_msg="Lambda columns do not sum to 1")

print("Julia notebook analog steps completed successfully!")
