In [1]:
from warnings import warn
from numpy import mean, transpose, cov, cos, sin, shape, exp, newaxis, concatenate
from numpy.linalg import linalg, LinAlgError, solve
from scipy.stats import chi2


#testing purposes
from numpy.random import seed
import numpy 
import abc

In [2]:
def _gen_random(dimension, num_random_features):
    return numpy.random.randn(dimension, num_random_features)

def smooth(data):
    w = linalg.norm(data, axis=1)
    w = exp(-w ** 2 / 2)
    return w[:, newaxis]

def smooth_cf(data, w, random_frequencies):
    n,_ = data.shape
    _,d = random_frequencies.shape
    mat = data.dot(random_frequencies)
    arr = concatenate((sin(mat)*w, cos(mat)*w),1)
    return arr

In [3]:
#Construct data arrays, p-value < 0.05
seed(120)

scale = 1
num_samples = 500
dimensions = 10
X = numpy.random.randn(num_samples, dimensions)
#X[:, 1] *= 3
Y = numpy.random.randn(num_samples, dimensions)
X.shape, Y.shape

((500, 10), (500, 10))

In [4]:
#init
data_x, data_y = scale*X, scale*Y
num_random_features = 5

_, dimension_x = numpy.shape(data_x)
_, dimension_y = numpy.shape(data_y)
assert dimension_x == dimension_y
random_frequencies = _gen_random(dimension_x, num_random_features)
random_frequencies.shape

(10, 5)

In [5]:
x_smooth, y_smooth = smooth(data_x), smooth(data_y)
characteristic_fxn_x = smooth_cf(data_x, x_smooth, random_frequencies)
characteristic_fxn_y = smooth_cf(data_y, y_smooth, random_frequencies)
smooth_diff = characteristic_fxn_x - characteristic_fxn_y

sigma = cov(transpose(smooth_diff))
mu = mean(smooth_diff, 0)
stat = num_samples * mu.dot(solve(sigma, mu.T)) #compute test statistic
pval = chi2.sf(stat, 2*num_random_features) #convert to p-value

print(pval)

0.9063755836469662
