In [1]:
import numpy as np

In [2]:
# Wassersten distance
def Wasserstein_Dist(XX, YY):
    '''
    Wasserstein_Dist_PVal is for Wasserstein distance measure with Boostrap-based p-value calculation.
    The p-Value can be used to validate statistical distance measures.

    XX: The first input vector. It should be a numpy array with length of n.
    YY: The second input vector. It should be a numpy array with lenght of m.
    '''

    import numpy as np
    nx = len(XX)
    ny = len(YY)
    n = nx + ny

    XY = np.concatenate([XX,YY])
    X2 = np.concatenate([np.repeat(1/nx, nx), np.repeat(0, ny)])
    Y2 = np.concatenate([np.repeat(0, nx), np.repeat(1/ny, ny)])

    S_Ind = np.argsort(XY)
    XY_Sorted = XY[S_Ind]
    X2_Sorted = X2[S_Ind]
    Y2_Sorted = Y2[S_Ind]

    Res = 0
    E_CDF = 0
    F_CDF = 0
    power = 1

    for ii in range(0, n-2):
        E_CDF = E_CDF + X2_Sorted[ii]
        F_CDF = F_CDF + Y2_Sorted[ii]
        height = abs(F_CDF-E_CDF)
        width = XY_Sorted[ii+1] - XY_Sorted[ii]
        Res = Res + (height ** power) * width;

    return Res

In [3]:
# Kuiper distance
def Kuiper_Dist(XX, YY):

    import numpy as np
    nx = len(XX)
    ny = len(np.array(YY))
    n = nx + ny

    XY = np.concatenate([XX,YY])
    X2 = np.concatenate([np.repeat(1/nx, nx), np.repeat(0, ny)])
    Y2 = np.concatenate([np.repeat(0, nx), np.repeat(1/ny, ny)])

    S_Ind = np.argsort(XY)
    XY_Sorted = XY[S_Ind]
    X2_Sorted = X2[S_Ind]
    Y2_Sorted = Y2[S_Ind]

    up = 0
    down = 0
    Res = 0
    E_CDF = 0
    F_CDF = 0
    height = 0
    power = 1

    for ii in range(0, n-2):
        E_CDF = E_CDF + X2_Sorted[ii]
        F_CDF = F_CDF + Y2_Sorted[ii]
        if XY_Sorted[ii+1] != XY_Sorted[ii]: height = F_CDF-E_CDF
        if height > up: up = height
        if height < down: down = height

    K_Dist = abs(down)**power + abs(up)**power

    return K_Dist

In [4]:
# Anderson darling distance
def Anderson_Darling_Dist(XX, YY):

    import numpy as np
    nx = len(XX)
    ny = len(np.array(YY))
    n = nx + ny

    XY = np.concatenate([XX,YY])
    X2 = np.concatenate([np.repeat(1/nx, nx), np.repeat(0, ny)])
    Y2 = np.concatenate([np.repeat(0, nx), np.repeat(1/ny, ny)])

    S_Ind = np.argsort(XY)
    XY_Sorted = XY[S_Ind]
    X2_Sorted = X2[S_Ind]
    Y2_Sorted = Y2[S_Ind]

    Res = 0
    E_CDF = 0
    F_CDF = 0
    G_CDF = 0
    height = 0
    SD = 0
    power = 1

    for ii in range(0, n-2):
        E_CDF = E_CDF + X2_Sorted[ii]
        F_CDF = F_CDF + Y2_Sorted[ii]
        G_CDF = G_CDF + 1/n
        SD = (n * G_CDF * (1-G_CDF))**0.5
        height = abs(F_CDF - E_CDF)
        if XY_Sorted[ii+1] != XY_Sorted[ii]:
            if SD>0:
                Res = Res + (height/SD)**power

    AD_Dist = Res

    return AD_Dist

In [5]:
# Cramer-Von Mises Distance
def CVM_Dist(XX, YY):

    import numpy as np
    nx = len(XX)
    ny = len(YY)
    n = nx + ny

    XY = np.concatenate([XX,YY])
    X2 = np.concatenate([np.repeat(1/nx, nx), np.repeat(0, ny)])
    Y2 = np.concatenate([np.repeat(0, nx), np.repeat(1/ny, ny)])

    S_Ind = np.argsort(XY)
    XY_Sorted = XY[S_Ind]
    X2_Sorted = X2[S_Ind]
    Y2_Sorted = Y2[S_Ind]

    Res = 0;
    E_CDF = 0;
    F_CDF = 0;
    power = 1;

    for ii in range(0, n-2):
        E_CDF = E_CDF + X2_Sorted[ii]
        F_CDF = F_CDF + Y2_Sorted[ii]
        height = abs(F_CDF - E_CDF)
        if XY_Sorted[ii+1] != XY_Sorted[ii]: Res = Res + height**power

    CVM_Dist = Res

    return CVM_Dist

In [6]:
def Kolmogorov_Smirnov_Dist(XX, YY):

    import numpy as np
    nx = len(XX)
    ny = len(YY)
    n = nx + ny

    XY = np.concatenate([XX,YY])
    X2 = np.concatenate([np.repeat(1/nx, nx), np.repeat(0, ny)])
    Y2 = np.concatenate([np.repeat(0, nx), np.repeat(1/ny, ny)])

    S_Ind = np.argsort(XY)
    XY_Sorted = XY[S_Ind]
    X2_Sorted = X2[S_Ind]
    Y2_Sorted = Y2[S_Ind]

    Res = 0;
    height = 0;
    E_CDF = 0;
    F_CDF = 0;
    power = 1;

    for ii in range(0, n-2):
        E_CDF = E_CDF + X2_Sorted[ii]
        F_CDF = F_CDF + Y2_Sorted[ii]
        if XY_Sorted[ii+1] != XY_Sorted[ii]: height = abs(F_CDF-E_CDF)
        if height > Res: Res = height

    KS_Dist = Res**power

    return KS_Dist