MAD Statistics

In [None]:
#Place MAD data into SBS groups

In [None]:
import numpy as np
from scipy import stats
from scipy.stats import mannwhitneyu, kruskal, wilcoxon, kstest, anderson
import math

# Function to calculate Cliff's Delta
def cliffs_delta(x, y):
    x, y = np.array(x), np.array(y)
    count = 0
    for i in x:
        for j in y:
            if i > j:
                count += 1
            elif i < j:
                count -= 1
    return count / (len(x) * len(y))

# Function to calculate Cohen's d
def cohens_d(x, y):
    diff = np.mean(x) - np.mean(y)
    pooled_std = np.sqrt((np.std(x) ** 2 + np.std(y) ** 2) / 2)
    return diff / pooled_std

# Function to calculate Hedges' g
def hedges_g(x, y):
    d = cohens_d(x, y)
    n1 = len(x)
    n2 = len(y)
    correction_factor = 1 - (3 / (4 * (n1 + n2 - 2) - 1))
    return d * correction_factor

# Function to run statistical tests and compare
def compare_data(data_1, label_1, data_2, label_2):
    significant_differences = []

    # Mann-Whitney U test
    stat, p_value = mannwhitneyu(data_1, data_2)
    if p_value < 0.05:
        significant_differences.append(f"Mann-Whitney U test: {p_value:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Mann-Whitney U test: {p_value:.4f}")

    # Kruskal-Wallis test
    stat, p_value = kruskal(data_1, data_2)
    if p_value < 0.05:
        significant_differences.append(f"Kruskal-Wallis test: {p_value:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Kruskal-Wallis test: {p_value:.4f}")

    # Wilcoxon Signed-Rank test
    if len(data_1) == len(data_2):  # Wilcoxon requires paired data
        stat, p_value = wilcoxon(data_1, data_2)
        if p_value < 0.05:
            significant_differences.append(f"Wilcoxon signed-rank test: {p_value:.4f}")
            print(f"Significant difference between {label_1} and {label_2} with Wilcoxon signed-rank test: {p_value:.4f}")

    # Generalized Wilcoxon-Mann-Whitney test (used in certain specialized cases)
    # This would require a custom implementation or a third-party package, so skipped here

    # Weighted KS Test (requires scipy's ks_2samp)
    stat, p_value = kstest(data_1, data_2)
    if p_value < 0.05:
        significant_differences.append(f"Weighted KS test: {p_value:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Weighted KS test: {p_value:.4f}")

    # Kolmogorov-Smirnov test
    stat, p_value = kstest(data_1, data_2)
    if p_value < 0.05:
        significant_differences.append(f"Kolmogorov-Smirnov test: {p_value:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Kolmogorov-Smirnov test: {p_value:.4f}")

    # Anderson-Darling test
    result_1 = anderson(data_1)
    result_2 = anderson(data_2)
    p_value_1 = result_1.significance_level[0]
    p_value_2 = result_2.significance_level[0]
    if p_value_1 < 0.05 or p_value_2 < 0.05:
        significant_differences.append(f"Anderson-Darling test: {p_value_1:.4f}, {p_value_2:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Anderson-Darling test: {p_value_1:.4f}, {p_value_2:.4f}")

    # Cliff's Delta
    delta = cliffs_delta(data_1, data_2)
    if abs(delta) > 0.474:  # Small, medium, large thresholds for Cliff's Delta
        significant_differences.append(f"Cliff's Delta: {delta:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Cliff's Delta: {delta:.4f}")

    # Cohen's d
    d = cohens_d(data_1, data_2)
    if abs(d) > 0.5:  # Cohen's d interpretation for large effect
        significant_differences.append(f"Cohen's d: {d:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Cohen's d: {d:.4f}")

    # Hedges' g
    g = hedges_g(data_1, data_2)
    if abs(g) > 0.5:  # Hedges' g interpretation for large effect
        significant_differences.append(f"Hedges' g: {g:.4f}")
        print(f"Significant difference between {label_1} and {label_2} with Hedges' g: {g:.4f}")

    return significant_differences

# Example usage
data_1 = np.random.normal(0, 1, 100)  # Example data
data_2 = np.random.normal(0.5, 1, 100)  # Example data
label_1 = 'Group 1'
label_2 = 'Group 2'

# Call the function to compare data
significant_differences = compare_data(data_1, label_1, data_2, label_2)
