In [8]:
from scipy.stats import multiscale_graphcorr
from sklearn.preprocessing import OneHotEncoder

def k_sample_test(X, y,score_func):
    """Compute the multivariate independence test statisitic for the provided sample.
    Read more in the :ref:`User Guide <multivariate_feature_selection>`.
    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The set of regressors that will be tested sequentially.
    y : ndarray of shape (n_samples,)
        The target vector.
    score_func : string that refers to a k_sample multivariate independence test from scipy
                The default is multiscale graph correlation.
    Returns
    -------
    stat : float that refers to the computed k_sample test statistic
    pvalue : float that refers to the computed k_sample pvalue
    """
    if X.shape == y.shape:
        test = multiscale_graphcorr(X, y, workers = -1,is_twosamp=True)
    else:
        one_hot_enc = OneHotEncoder() 
        temp =  one_hot_enc.fit_transform(y.reshape(-1,1))
        y = temp.toarray()
        if score_func == "MGC":
            test = multiscale_graphcorr(X, y, workers = -1)#mgc case
        else: 
            test = multiscale_graphcorr(X, y,workers=-1) #default is mgc as no other tests in scipy
    return(test.stat,test.pvalue)

In [43]:
import numpy as np
import pytest
from numpy.testing import assert_almost_equal, assert_raises
from hyppo.ksample import KSample
from hyppo.tools import rot_ksamp
#Linear - two_Sample case
def k_sample_equals_rot(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("linear", n, p, k=2)
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(X,y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#linear _ one hot encoding case
def k_sample_equals_rot_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("linear", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#exponential one hot encoding case
def k_sample_equals_rot_exp_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("exponential", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#cubic one hot encoding case
def k_sample_equals_rot_cub_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("cubic", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#joint_normal one hot encoding case
def k_sample_equals_rot_norma_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("joint_normal", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#step one hot encoding case
def k_sample_equals_rot_step_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("step", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#quadratic one hot encoding case
def k_sample_equals_rot_quad_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("quadratic", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#w_Shaped one hot encoding case
def k_sample_equals_rot_w_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("w_shaped", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#spiral one hot encoding case
def k_sample_equals_rot_spiral_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("spiral", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#bernoulli one hot encoding case
def k_sample_equals_rot_bernoulli_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("uncorrelated_bernoulli", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#logarithmic one hot encoding case
def k_sample_equals_rot_log_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("logarithmic", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#fourth_root one hot encoding case
def k_sample_equals_rot_fourth_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("fourth_root", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#square one hot encoding case
def k_sample_equals_rot_square_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("square", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#two_parabolas one hot encoding case
def k_sample_equals_rot_parab_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("two_parabolas", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#circle one hot encoding case
def k_sample_equals_rot_circle_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("circle", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#ellipse one hot encoding case
def k_sample_equals_rot_ellipse_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("ellipse", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#diamond one hot encoding case
def k_sample_equals_rot_diamond_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("diamond", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#noise one hot encoding case
def k_sample_equals_rot_noise_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("multiplicative_noise", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)
#independence one hot encoding case
def k_sample_equals_rot_ind_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("multimodal_independence", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)

In [69]:
#two_sample case, low samples, low features
print(k_sample_equals_rot(50,2))
#two sample case for hyppo ksample, class representation in my version, low samples, low features
print(k_sample_equals_rot_2(50,2))

None
None


In [70]:
#two_sample case, low samples, medium features
print(k_sample_equals_rot(50,80))
#two sample case for hyppo ksample, class representation in my version, low samples, medium features
print(k_sample_equals_rot_2(50,80))

None
None


In [71]:
#two_sample case, low samples, high features
print(k_sample_equals_rot(50,1000))
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_2(50,1000))

None
None


In [72]:
#two_sample case, medium samples, high features
print(k_sample_equals_rot(100,1000))
#two sample case for hyppo ksample, class representation in my version, medium samples, high features
print(k_sample_equals_rot_2(100,1000))

None
None


In [10]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_exp_2(50,1000))

None


In [13]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_cub_2(50,1000))

None


In [18]:
#two sample case for hyppo ksample, class representation in my version, low samples, low features as covariance matrix not calculable with p>10
print(k_sample_equals_rot_norma_2(50,5))

None


In [20]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_step_2(50,1000))

None


In [23]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_quad_2(50,1000))

None


In [25]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_w_2(50,1000))

None


In [29]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_spiral_2(50,1000))

None


In [32]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_bernoulli_2(50,1000))

None


In [33]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_log_2(50,1000))

None


In [34]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_fourth_2(50,1000))

None


In [35]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_square_2(50,1000))

None


In [38]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_parab_2(50,1000))

None


In [39]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_circle_2(50,1000))

None


In [40]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_ellipse_2(50,1000))

None


In [41]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_diamond_2(50,1000))

None


In [44]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_noise_2(50,1000))

None


In [45]:
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_ind_2(50,1000))

None
