In [31]:
from scipy.stats import multiscale_graphcorr
from sklearn.preprocessing import OneHotEncoder

def k_sample_test(X, y,score_func):
    """Compute the multivariate independence test statisitic for the provided sample.
    Read more in the :ref:`User Guide <multivariate_feature_selection>`.
    Parameters
    ----------
    X : {array-like, sparse matrix} of shape (n_samples, n_features)
        The set of regressors that will be tested sequentially.
    y : ndarray of shape (n_samples,)
        The target vector.
    score_func : string that refers to a k_sample multivariate independence test from scipy
                The default is multiscale graph correlation.
    Returns
    -------
    stat : float that refers to the computed k_sample test statistic
    pvalue : float that refers to the computed k_sample pvalue
    """
    if X.shape == y.shape:
        test = multiscale_graphcorr(X, y, workers = -1,is_twosamp=True)
    else:
        one_hot_enc = OneHotEncoder() 
        temp =  one_hot_enc.fit_transform(y.reshape(-1,1))
        y = temp.toarray()
        if score_func == "MGC":
            test = multiscale_graphcorr(X, y, workers = -1)#mgc case
        else: 
            test = multiscale_graphcorr(X, y,workers=-1) #default is mgc as no other tests in scipy
    return(test.stat,test.pvalue)

In [68]:
import numpy as np
import pytest
from numpy.testing import assert_almost_equal, assert_raises
from hyppo.ksample import KSample
from hyppo.tools import rot_ksamp

def k_sample_equals_rot(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("linear", n, p, k=2)
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(X,y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)

def k_sample_equals_rot_2(n,p):
    np.random.seed(123456789)
    X, y = rot_ksamp("linear", n, p, k=2)
    shape = X.shape[0]
    x = np.concatenate((X, y), axis=0)
    Y = np.concatenate((np.zeros((shape,),dtype = int),np.ones((shape,),dtype = int)))
    true_stat,true_pvalue,_ = KSample("MGC").test(X,y)
    attempt_stat,attempt_pvalue = k_sample_test(x,Y,"MGC")
    assert_almost_equal(true_stat, attempt_stat, decimal=1)




In [69]:
#two_sample case, low samples, low features
print(k_sample_equals_rot(50,2))
#two sample case for hyppo ksample, class representation in my version, low samples, low features
print(k_sample_equals_rot_2(50,2))

None
None


In [70]:
#two_sample case, low samples, medium features
print(k_sample_equals_rot(50,80))
#two sample case for hyppo ksample, class representation in my version, low samples, medium features
print(k_sample_equals_rot_2(50,80))

None
None


In [71]:
#two_sample case, low samples, high features
print(k_sample_equals_rot(50,1000))
#two sample case for hyppo ksample, class representation in my version, low samples, high features
print(k_sample_equals_rot_2(50,1000))

None
None


In [72]:
#two_sample case, medium samples, high features
print(k_sample_equals_rot(100,1000))
#two sample case for hyppo ksample, class representation in my version, medium samples, high features
print(k_sample_equals_rot_2(100,1000))

None
None
