In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
import seaborn as sns

# Mann Whitney U Test

In [2]:
import numpy as np
from scipy import stats

def mann_whitney_u(A, B):
    all_data = np.concatenate((A, B))
    ranks = stats.rankdata(all_data)
    ranks_A = ranks[:len(A)]
    ranks_B = ranks[len(A):]
    R1 = np.sum(ranks_A)
    R2 = np.sum(ranks_B)
    n1, n2 = len(A), len(B)

    U1 = (n1 * n2) + (n1 * (n1 + 1) / 2) - R1
    U2 = (n1 * n2) + (n2 * (n2 + 1) / 2) - R2
    U = min(U1, U2)
    mu_U = (n1 * n2) / 2
    sigma_U = np.sqrt((n1 * n2 * (n1 + n2 + 1)) / 12)

    Z = (U - mu_U) / sigma_U
    p_value = 2 * (1 - stats.norm.cdf(abs(Z)))

    print(f"Mann-Whitney U Statistic: {U}")
    print(f"Z-score: {Z:.4f}")
    print(f"P-value: {p_value:.4f}")

    alpha = 0.05
    if p_value < alpha:
        print("Reject the null hypothesis: Significant difference detected.")
    else:
        print("Fail to reject the null hypothesis: No significant difference detected.")

    return U, p_value

# Wilcoxon Sign-Ranked Test

In [3]:
def WilcoxonSR(A, B):
    d = A - B
    ad = np.abs(d)
    r = stats.rankdata(ad)
    sr = np.sign(d) * r
    Wp, Wn = np.sum(sr[sr > 0]), -np.sum(sr[sr < 0])
    W = min(Wp, Wn)
    n = len(d)
    mu, sig = n * (n + 1) / 4, np.sqrt(n * (n + 1) * (2 * n + 1) / 24)
    Z = (W - mu) / sig
    p = 2 * stats.norm.cdf(Z)
    print(f"Z = {Z:.4f}, P = {p:.4f}, W+ = {Wp}, W- = {Wn}, W = {W}")
    print("Reject H0" if p < 0.05 else "Fail to reject H0")

# Kruskal Wallis Test (1Way ANOVA equivalent)

In [4]:
def KruskalWallis(A, B, C):
    data = np.concatenate([A, B, C])
    grps = ['A'] * len(A) + ['B'] * len(B) + ['C'] * len(C)
    rnk = stats.rankdata(data)
    df = pd.DataFrame({'Grp': grps, 'Val': data, 'Rnk': rnk}).sort_values(by='Val')
    R = df.groupby('Grp')['Rnk'].sum()
    n = np.array([len(A), len(B), len(C)])
    H = (12 / (len(data) * (len(data) + 1))) * np.sum(R**2 / n) - 3 * (len(data) + 1)
    p = 1 - stats.chi2.cdf(H, 2)
    print(f"H = {H:.4f}, p = {p:.4f}")
    print("Reject H0" if p < 0.05 else "Fail to reject H0")


# FriedMan Test

In [5]:
def Friedman(A, B, C):
    d = np.array([A, B, C]).T
    r = np.apply_along_axis(stats.rankdata, 1, d)
    R = np.sum(r, axis=0)
    n, k = d.shape
    Q = (12 / (n * k * (k + 1))) * np.sum(R**2 / n) - 3 * n * (k + 1)
    p = 1 - stats.chi2.cdf(Q, k - 1)
    print(f"Q: {Q:.4f}, p: {p:.4f}")
    print("Reject H0" if p < 0.05 else "Fail to reject H0")