In [2]:
import numpy as np
import pandas as pd

def ks_test(X1, X2, flag=1):
    """
    Performs Kolmogorov-Smirnov test between two samples.

    Parameters:
    -----------
    X1 :
        First sample (1D numpy array, list, or pandas Series).
    X2 :
        Second sample (1D numpy array, list, or pandas Series).
    flag : int
        1 = classical KS (maximum distance between CDFs)
        2 = total KS (mean absolute difference between CDFs)

    Returns:
    --------
    float
        KS statistic based on the flag value.


    """
    # Convert inputs to numpy arrays
    X1 = np.asarray(X1).flatten()
    X2 = np.asarray(X2).flatten()

    # Create a combined sorted array of all values
    all_values = np.sort(np.concatenate([X1, X2]))

    # Calculate empirical CDFs for both samples
    cdf1 = np.array([np.sum(X1 <= value) / len(X1) for value in all_values])
    cdf2 = np.array([np.sum(X2 <= value) / len(X2) for value in all_values])

    # Calculate absolute differences between CDFs
    differences = np.abs(cdf1 - cdf2)

    # Return statistic based on flag
    if flag == 1:
        # Classical KS: maximum distance
        return np.max(differences)
    elif flag == 2:
        # Total KS: mean absolute difference
        return np.mean(differences)
    else:
        raise ValueError("Flag must be 1 (classical KS) or 2 (total KS)")

In [6]:
ks_test(100,33)

np.float64(1.0)