<a href="https://colab.research.google.com/github/Lupin3-droid/Lupin3-droid/blob/main/non_parametric_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#generating random numbers.
from numpy.random import seed
from numpy.random import randn
from numpy import mean
from numpy import std
# seed the random number generator
seed(4)
# generate two sets of univariate observations
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51
# summarize
print('data1: mean=%.3f stdv=%.3f' % (mean(data1), std(data1)))
print('data2: mean=%.3f stdv=%.3f' % (mean(data2), std(data2)))

data1: mean=50.176 stdv=4.865
data2: mean=51.109 stdv=4.671


In [None]:
#Mann-Whitney U-Test/wilcoxon-mann whitney rank test.
from scipy.stats import mannwhitneyu
# seed the random number generator
seed(4)
# generate two independent samples
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51
# compare samples
stat, p = mannwhitneyu(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
	print('Same distribution (fail to reject H0)')
else:
	print('Different distribution (reject H0)')

Statistics=4531.000, p=0.252
Same distribution (fail to reject H0)


In [None]:
#Wilcoxon Sign Rank test.
from scipy.stats import wilcoxon
# seed the random number generator
seed(4)
# generate two independent samples
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51
# compare samples
stat, p = wilcoxon(data1, data2)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
	print('Same distribution (fail to reject H0)')
else:
	print('Different distribution (reject H0)')

Statistics=2124.000, p=0.168
Same distribution (fail to reject H0)


In [None]:
#Kruskal-Wallis test.
from numpy.random import seed
from numpy.random import randn
from scipy.stats import kruskal
# seed the random number generator
seed(4)
# generate three independent samples
data1 = 5 * randn(100) + 50
data2 = 5 * randn(100) + 51
data3 = 5 * randn(100) + 52
# compare samples
stat, p = kruskal(data1, data2, data3)
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
	print('Same distributions (fail to reject H0)')
else:
	print('Different distributions (reject H0)')

Statistics=12.294, p=0.002
Different distributions (reject H0)


In [None]:
#test for randomness based on median.
from statsmodels.sandbox.stats.runs import runstest_1samp
from numpy.random import seed
from numpy.random import randn
# seed the random number generator
seed(4)
#create dataset
data = 5 * randn(100) + 52
alpha = 0.05
#Perform Runs test
test=runstest_1samp(data, correction=False)

#this will print the value and the p-value.
print(test[1])
if test[1] > alpha:
	print('the data is produced in a random manner(fail to reject H0)')
else:
	print('data is not produced in a random manner (reject H0)')


0.07528063097161039
the data is produced in a random manner(fail to reject H0)


In [None]:
#test for randomness based on runs using python.
import scipy.stats as stats

def runs_test(sequence):
    n = len(sequence)
    n1, n0 = sequence.count(0), sequence.count(1)

    if n1 == 0 or n0 == 0:
        return "Sequence is not random: Only one type of digit present."

    # Count runs
    runs = sum(1 for i in range(1, n) if sequence[i] != sequence[i-1]) + 1

    # Expected runs and variance
    expected_runs = (2 * n1 * n0) / n + 1
    variance_runs = (2 * n1 * n0 * ((2 * n1 * n0) - n1 - n0)) / (n**2 * (n - 1))

    # Z-score and p-value
    z = (runs - expected_runs) / (variance_runs**0.5)
    p_value = 2 * (1 - stats.norm.cdf(abs(z)))

    return {
        "runs": runs,
        "expected_runs": expected_runs,
        "variance_runs": variance_runs,
        "z_score": z,
        "p_value": p_value,
        "is_random": p_value > 0.05  # True if p-value > 0.05
    }

# Example usage
sequence = [1,1,1,0,1,0,0,0,1,0,1,0,1,1,1,0,0,0,1,0,0,1,0,0,1,1,1,0,1,0,1,1,0,0,0,1,0,1,0,0,0,1,0,1]  # Replace with your sequence
result = runs_test(sequence)

print("Runs Test Results:")
for k, v in result.items():
    print(f"{k}: {v}")


Runs Test Results:
runs: 27
expected_runs: 22.954545454545453
variance_runs: 10.698779550259466
z_score: 1.2368020783245357
p_value: 0.21616057031920777
is_random: True


In [None]:
#Wilcoxon sign Rank test.
"""Researchers want to know if a new fuel treatment leads to a change in the average mpg of a certain car.
To test this, they measure the mpg of 12 cars with and without the fuel treatment."""
import scipy.stats as stats
group1 = [20, 23, 21, 25, 18, 17, 18, 24, 20, 24, 23, 19]
group2 = [24, 25, 21, 22, 23, 18, 17, 28, 24, 27, 21, 23]
#perform the Wilcoxon-Signed Rank Test
test=stats.wilcoxon(group1, group2)

print(test[1])
if test[1] > alpha:
	print('The mpg is equal between the two groups(fail to reject H0)')
else:
	print('The mpg is not equal between the two groups (reject H0)')


0.044065400736826854
The mpg is not equal between the two groups (reject H0)


In [None]:
#Two sample wald wolfwitz test.
import numpy as np
from scipy.stats import norm

def wald_wolfowitz_test(sample1, sample2):
    # Combine samples and assign labels
    combined = np.concatenate([sample1, sample2])
    labels = [1] * len(sample1) + [0] * len(sample2)

    # Sort combined array and reorder labels
    sorted_labels = [label for _, label in sorted(zip(combined, labels))]

    # Count runs
    runs = sum(sorted_labels[i] != sorted_labels[i - 1] for i in range(1, len(sorted_labels))) + 1

    # Sizes of the two samples
    n1, n2 = len(sample1), len(sample2)

    # Expected runs and variance
    expected_runs = (2 * n1 * n2) / (n1 + n2) + 1
    variance_runs = (2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) / ((n1 + n2)**2 * (n1 + n2 - 1))

    # Z-score and p-value
    z = (runs - expected_runs) / (variance_runs**0.5)
    p_value = 2 * (1 - norm.cdf(abs(z)))

    return {
        "runs": runs,
        "expected_runs": expected_runs,
        "variance_runs": variance_runs,
        "z_score": z,
        "p_value": p_value,
        "is_same_distribution": p_value > 0.05
    }

# Example usage
sample1 =[82, 15, 4, 95, 36, 32, 29, 18, 95, 14, 87, 95, 70, 12, 76, 55, 5, 4, 12, 28]
sample2 = [30, 65, 78, 4, 72, 26, 92, 84, 90, 70, 54, 29, 58, 76, 36, 1, 98, 21, 90, 55]


result = wald_wolfowitz_test(sample1, sample2)

print("Wald-Wolfowitz Test Results:")
for k, v in result.items():
    print(f"{k}: {v}")


Wald-Wolfowitz Test Results:
runs: 23
expected_runs: 21.0
variance_runs: 9.743589743589743
z_score: 0.6407232755171874
p_value: 0.5217024890869293
is_same_distribution: True


In [None]:
#one sample sign test.
from scipy.stats import binom
import numpy as np # Import numpy

def one_sample_sign_test(sample, hypothesized_median):
    # Convert the sample list to a NumPy array
    sample = np.array(sample)
    # Calculate differences from the hypothesized median
    differences = sample - hypothesized_median

    # Count positive and negative signs (ignoring zero differences)
    positive = np.sum(differences > 0)
    negative = np.sum(differences < 0)

    # Perform a binomial test (one-sided)
    n = positive + negative  # Number of non-zero differences
    if n == 0:
        return "All differences are zero, cannot perform the test."

    # Calculate the p-value using binomial PMF (two-sided test)
    p = 0.5  # The probability of success (null hypothesis)
    p_value = 2 * min(binom.cdf(positive, n, p), 1 - binom.cdf(positive - 1, n, p))

    return {
        "positive": positive,
        "negative": negative,
        "p_value": p_value,
        "is_median_equal": p_value > 0.05  # True if p-value > 0.05
    }

# Example usage
sample = [30, 65, 78, 4, 72, 26, 92, 84, 90, 70, 54, 29, 58, 76, 36, 1, 98, 21, 90, 55] # Sample data
hypothesized_median = 70      # Hypothesized median value

result = one_sample_sign_test(sample, hypothesized_median)

print("One-Sample Sign Test Results:")
for k, v in result.items():
    print(f"{k}: {v}")

One-Sample Sign Test Results:
positive: 8
negative: 11
p_value: 0.6476058959960938
is_median_equal: True


In [None]:
from scipy.stats import binom
import numpy as np

def paired_sample_sign_test(sample1, sample2):
    # Compute the differences between the paired samples
    differences = np.array(sample1) - np.array(sample2)

    # Count positive and negative differences
    positive = np.sum(differences > 0)
    negative = np.sum(differences < 0)

    # Number of non-zero differences
    non_zero_differences = positive + negative

    # Calculate the p-value using binomial PMF (two-sided test)
    p = 0.5  # The probability of success (null hypothesis)
    p_value = 2 * min(binom.cdf(positive, non_zero_differences, p), 1 - binom.cdf(positive - 1, non_zero_differences, p))
    # Return the results
    return {
        "positive_differences": positive,
        "negative_differences": negative,
        "p_value": p_value,
        "is_significant": p_value < 0.05  # Test at 5% significance level
    }

# Example usage
sample1 = [52, 93, 15, 72, 61, 21, 83, 87, 75, 75, 88, 24, 3, 22, 53, 2, 88, 30, 38, 2, 64, 60, 21, 33]
sample2= [76, 58, 22, 89, 49, 91, 59, 42, 92, 60, 80, 15, 62, 62, 47, 62, 51, 55, 64, 3, 51, 7, 21, 73]


result = paired_sample_sign_test(sample1, sample2)

print("Paired Sample Sign Test Results:")
for k, v in result.items():
    print(f"{k}: {v}")


Paired Sample Sign Test Results:
positive_differences: 11
negative_differences: 12
p_value: 1.0
is_significant: False


In [None]:
#spearmans rank test.
from scipy.stats import spearmanr
import numpy as np

# Example data
x = [52, 93, 15, 72, 61, 21, 83, 87, 75, 75, 88, 24, 3, 22, 53, 2, 88, 30, 38, 2, 64, 60, 21, 33]
y = [76, 58, 22, 89, 49, 91, 59, 42, 92, 60, 80, 15, 62, 62, 47, 62, 51, 55, 64, 3, 51, 7, 21, 73]

# Perform Spearman's rank correlation test
correlation, p_value = spearmanr(x, y)

# Output results
print(f"Spearman's rank correlation coefficient: {correlation}")
print(f"P-value: {p_value}")
alpha=0.05
if p_value > alpha:
	print('The data is not correlated(fail to reject H0)')
else:
	print('The data is correlated (reject H0)')
3

Spearman's rank correlation coefficient: 0.15966020853591623
P-value: 0.456140331434012
The data is not correlated(fail to reject H0)
