In [3]:
import numpy as np
import scipy.stats as sp

def sliced_wasserstein_distance(X, Y, L=100):
    """
    Computes the sliced Wasserstein distance (SWD_p) between two sets of samples.
    
    Parameters:
    - X: numpy array of shape (N, d) -> first sample set
    - Y: numpy array of shape (N, d) -> second sample set
    - L: int, number of random projections
    - p: int, order of Wasserstein distance (default: 1)
    
    Returns:
    - SWD_p: float, the sliced Wasserstein distance
    """
    N, d = X.shape  # Assuming X and Y have the same shape
    S = 0  # Accumulation variable

    for _ in range(L):
        # Sample a random unit vector (projection direction)
        theta = np.random.randn(d)
        theta /= np.linalg.norm(theta)  # Normalize to unit sphere

        # Compute projections
        alpha = X @ theta
        beta = Y @ theta

        # Compute 1D Wasserstein distance
        W_i = sp.wasserstein_distance(alpha, beta)

        print(W_i)

        # Accumulate
        S += W_i

    # Compute final SWD
    SWD_p = (S / L) 
    
    return SWD_p


In [47]:
# Generate example data
np.random.seed(42)
N, d = 100, 1  # 100 samples, 2D data

# Generate two Gaussian-distributed sample sets
X = np.random.normal(loc=0, scale=1, size=(N, d))
Y = np.random.normal(loc=5.6, scale=1, size=(N, d))  # Shifted mean

# Compute SWD
swd_result = sliced_wasserstein_distance(X, Y, L=5)
swd_result

5.726151104444018
5.726151104444018
5.726151104444018
5.726151104444018
5.726151104444018


5.726151104444018