In [1]:
import torch
import numpy as np
import scipy.stats
from scipy.stats import rankdata

In [None]:
# where y is a matrix of J x N 
# where z is a vector of J x 1 
# returns a J x 1 vector 
def compute_f_Hat(y,z): 
    """
    This function checks each row in y to check if each element is less than z in that same row

    Parameters: 
    y (JxN Matrix): Where J represents the number of unit Vectors and N represents the number of functions 
    z (Jx1): A vector to compare each row by. 
    """
    count_less_than_threshold = (y <= z[:, np.newaxis]).sum(axis=1)
    result = count_less_than_threshold/ y.shape[1]
    return result

# arr represents a singular function in points on a grid 1 x M ( where M represents the number of points n the grid)
# Xn represents the matrix for all functions N x M (Where N is the number of functions)
# U represents the matrix of random numbers pulled from the normal distrubution with shape J x M 
# Where J represents the unit vectors and M represents the number of points
# This function will return the depth of function x in respect to all other functions in the data set

def depth(arr, xn, u):
    """
    The depth function will tell us the depth of any given functional data point relative to the data set 

    Parameters: 
    arr(1xM)/np.array: Represents one functional data point. Where M is the number of observations. 
    xn(NxM)np.array: Represents all functional data points. 
    u(JxM)np.array: Represents the unit vectors, where J is the number of unit vectors and M is number of points.

    Returns: 
    np.array : A nx1 Array that represents the depth of each function relative to eachother. 
    """
    arr_u_dot2 = np.dot(u,arr)
    xn_uj2 = np.dot(xn,u.T).T
    fHatVector = compute_f_Hat(xn_uj2,arr_u_dot2)
    result = ((np.dot(fHatVector, 1- fHatVector))/u.shape[0])
    return result



# We might need to add a flag for how rank method is caluclated 
def KW_H_Test(functionalDataSet: np.ndarray, UnitVectorMatrix: np.ndarray, 
              groups:np.ndarray): 
    # Takes in Xn and the group labels and pass it U 
    # Groups will be a 1xn array with each element refering to a row in a matrix representing its group 

    # Returns a Nx 1 vector representing the depth for each row of functionalDataset
    depthVector = np.apply_along_axis(depth,axis=1,arr = functionalDataSet,xn = functionalDataSet, u = UnitVectorMatrix)

    # I dont understand why the axis =1 because that should then be column wise but it works when axis = 1 so... axis =1 

    # Gets the rank giving us a Nx1 vector 
    rankVector = rankdata(depthVector)
        
    # there is a scipy version of kruskal wallis we can use 
    groupRankDict = dict()
    for i in range(0,groups.shape[0]): # Runs at O(N)
        oldValue = groupRankDict.get(groups[i])
        if oldValue != None: # the key value exsits 
            groupRankDict[groups[i]] = oldValue + rankVector[i]
        else:
            groupRankDict[groups[i]] = rankVector[i]

    unique, counts = np.unique(groups, return_counts=True) # Runs at O(Groups) which should be fast 
    
    summation = 0 
    for key, value in dict(zip(unique, counts)).items(): # Runs at O(N)
        summation += (groupRankDict[key] **2) / value
    
    hStat =  12 /(groups.size* (groups.size +1)) *summation - (3*(groups.size +1))
    degreeOfFreedom = unique.size -1 
    chiSquare = scipy.stats.chi2.ppf(1-.05, df=degreeOfFreedom)
    return(hStat>chiSquare)


In [None]:
Xn = np.random.random((100, 10)) # represents all the data  in points
U = np.random.random((20, 10))   # unit matrix
groups = np.random.randint(1, 11, size=100) # where size must be the amount of rows in Xn 
temp = KW_H_Test(Xn,U,groups)