In [1]:
import torch
import numpy as np
import scipy.stats
from scipy.stats import rankdata
from scipy.interpolate import UnivariateSpline

In [9]:
# where y is a matrix of J x N 
# where z is a vector of J x 1 
# returns a J x 1 vector 
def compute_f_Hat(y,z): 
    """
    This function checks each row in y to check if each element is less than z in that same row

    Parameters: 
    y (JxN Matrix): Where J represents the number of unit Vectors and N represents the number of functions 
    z (Jx1): A vector to compare each row by. 
    """
    count_less_than_threshold = (y <= z[:, np.newaxis]).sum(axis=1)
    result = count_less_than_threshold/ y.shape[1]
    return result


# arr represents a singular function in points on a grid 1 x M ( where M represents the number of points n the grid)
# Xn represents the matrix for all functions N x M (Where N is the number of functions)
# U represents the matrix of random numbers pulled from the normal distrubution with shape J x M 
# Where J represents the unit vectors and M represents the number of points
# This function will return the depth of function x in respect to all other functions in the data set

def depth(arr, u, xn_uj):
    """
    The depth function will tell us the depth of any given functional data point relative to the data set 
    Parameters: 
    arr(1xM)/np.array: Represents one functional data point. Where M is the number of observations. 
    xn(NxM)np.array: Represents all functional data points. 
    u(JxM)np.array: Represents the unit vectors, where J is the number of unit vectors and M is number of points.
    Returns: 
    np.array : An double that represents the depth of each function relative to eachother. 
    """
    arr_u_dot = torch.matmul(u,arr)
    fHatVector = compute_f_Hat(xn_uj,arr_u_dot)
    result = ((torch.matmul(fHatVector, 1- fHatVector))/u.shape[0])
    return result


def getDepth(functionalDataSet,UnitVectorMatrix):
    xn = torch.from_numpy(functionalDataSet)
    uj = torch.from_numpy(UnitVectorMatrix)
    xn_uj = torch.matmul(xn,uj.T).T
    # Returns a Nx 1 vector representing the depth for each row of functionalDataset
    depthList = list()
    for row in xn:
        depthList.append(depth(row,uj,xn_uj).item())    
    # Have to convert to a numpy array since scipcy doesnt like Tensors
    depthVector = np.array(depthList)
    return depthVector



# We might need to add a flag for how rank method is caluclated 
def KW_H_Test(functionalDataSet: np.ndarray, UnitVectorMatrix: np.ndarray, 
              groups:np.ndarray): 
    
    # # Gets the rank giving us a Nx1 vector 
    x = getDepth(functionalDataSet,UnitVectorMatrix)
    rankVector = rankdata(x)
        
    # there is a scipy version of kruskal wallis we can use 
    groupRankDict = dict()
    for i in range(0,groups.shape[0]): # Runs at O(N)
        oldValue = groupRankDict.get(groups[i])
        if oldValue != None: # the key value exsits 
            groupRankDict[groups[i]] = oldValue + rankVector[i]
        else:
            groupRankDict[groups[i]] = rankVector[i]

    unique, counts = np.unique(groups, return_counts=True) # Runs at O(Groups) which should be fast 
    
    summation = 0 
    for key, value in dict(zip(unique, counts)).items(): # Runs at O(N)
        summation += (groupRankDict[key] **2) / value
    
    hStat =  12 /(groups.size* (groups.size +1)) *summation - (3*(groups.size +1))
    degreeOfFreedom = unique.size -1 
    chiSquare = scipy.stats.chi2.ppf(1-.05, df=degreeOfFreedom)
    return(hStat>chiSquare)

In [10]:
Xn = np.random.random((100, 10)) # represents all the data  in points
U = np.random.random((20, 10))   # unit matrix
groups = np.random.randint(1, 11, size=100) # where size must be the amount of rows in Xn 
temp = KW_H_Test(Xn,U,groups)

In [72]:
# Given data points (x and y)
x = np.array([1, 2, 3, 4, 5])
y = np.array([3, 8, 13, 20, 30])

# Create a spline interpolation object
spline = UnivariateSpline(x, y, s=0)

# Calculate the derivative using the spline object
derivative = spline.derivative()

# Evaluate the derivative at specific points
derivative_values = derivative(x)

print("Approximate Derivative:", derivative_values)

Approximate Derivative: [ 5.75   4.625  5.75   8.375 11.75 ]


In [None]:
# likelyhood depth instead of Simplicial depth

How to compute multiple row operations at the same time: https://saturncloud.io/blog/pytorch-batch-rowwise-application-of-function/#:~:text=PyTorch%20provides%20an%20efficient%20way%20to%20apply%20a%20function%20row,that%20applies%20to%20each%20row.