In [None]:
cdef double k_sample_test(np.ndarray[double, ndim=2] X, np.ndarray[double, ndim=1] y,str score_func="mgc"):
    """Nonparametric `K`-Sample Testing test statistic.
     
    A k-sample test tests equality in distribution among groups. Groups
    can be of different sizes, but must have the same dimensionality.
    This implementation reduces the k-sample testing to an 
    independence testing problem, and leverages notable and powerful
    multivariate independence tests.
    
    Read more in the :ref:`User Guide <multivariate_feature_selection>`.
    
    Parameters
    ----------
    X : array-like of shape (n_samples, n_features)
        Sample vectors.
    y : ndarray of shape (n_samples,)
        The target vector.
    score_func : string that refers to a multivariate independence test from scipy
        The default and only existing test is multiscale graph correlation.
    
    Returns
    -------
    stat : float that refers to the computed k-sample test statistic
    
    Notes
    -----
    1. The k-sample testing problem can be thought of as a generalization of
    the two sample testing problem. 
    
    2. By manipulating the inputs of the k-sample test, we create
    concatenated versions of the inputs and a label matrix which are
    paired. Then, any multivariate nonparametric test can be performed on
    this data.
    
    3. Multivariate feature selection uses k-sample test score function to
    calculate a test statistic for each feature not already selected as a 
    best feature. For each feature in that sub-section, inputted is a data matrix 
    with best features selected and that additional feature.
    
    References
    ----------
    .. [1] Sambit Panda, Cencheng Shen, Ronan Perry, Jelle Zorn, Antoine Lutz, 
           Carey E. Priebe, and Joshua T. Vogelstein. Nonpar MANOVA via 
           Independence Testing. arXiv:1910.08883 [cs, stat], April 2021. 

    """
    # extract data matrix of shape (_samples,_features) for each group
    np.ndarray[int, ndim=1] k_array = np.unique(y) # unsure if needs to be rewritten in cython
    list matrices
    int i;
    list indices
    int shape = X.shape
    np.ndarray[double, ndim=2] xi
    for i in k_array:
        indices = np.where(y == i)[0] 
        if shape == 1:
            xi = X[indices]
        else:
            xi = X[indices,:]
        matrices.append(xi)
    X = np.concatenate(matrices)
    # one hot encode y for multivariate independence test
    list vs
    int n
    np.ndarray[double, ndim=2] encode
    int j
    int size = k_array.size
    for j in range(size):
        n = matrices[j].shape[0]
        encode = np.zeros(shape=(n, len(matrices)))
        encode[:, j] = np.ones(shape=n)
        vs.append(encode)
    y = np.concatenate(vs) #unsure if needs to be rewritten cython
    
    # default, which is mgc case
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        mgc = multiscale_graphcorr(X,y,reps = 0) #unsure of cythonizing
    double stat = mgc.stat 
    return stat