In [1]:
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

In [37]:
x1 = np.array([540,670,1000,960,1200,4650,4200])
x2 = np.array([900,1300,4500,5000,6100,7400,7500])

## Perform the Mann-Whitney U test

## 1. Manuall Calculation of u-test and p-value

In [134]:
# Combine the two samples
combined_sample = np.concatenate((x1, x2))

# Assign a rank to each value in the combined sample
ranks = stats.rankdata(combined_sample)

# Split the ranks into two groups based on the original samples
ranks1 = ranks[:len(x1)]
ranks2 = ranks[len(x1):]

# Calculate the U statistic for the first sample
U1 = np.sum(ranks1) - (len(x1) * (len(x1) + 1)) / 2

# Calculate the U statistic for the second sample
U2 = np.sum(ranks2) - (len(x2) * (len(x2) + 1)) / 2

# The smaller of the two U statistics is used as the test statistic for the Mann-Whitney U test
U = min(U1, U2)

print('U-Statistics is: %.3f' %(U))

U-Statistics is: 8.000


## 2. Calling Python library to estiamte p-value and u-statistics for two samples

In [135]:
u_statistic, p_value = stats.mannwhitneyu(x1, x2, alternative='less')
print('U-Statistics is: %.3f, P-value=%.3f' %(u_statistic,p_value))

U-Statistics is: 8.000, P-value=0.019


# t-Test

## 1. Calculate t-test manually

In [136]:
# Calculate the sample size and mean for each sample
n1 = len(x1)
n2 = len(x2)
mean1 = np.mean(x1)
mean2 = np.mean(x2)

# Calculate the sample variance for each sample
var1 = np.var(x1, ddof=1)
var2 = np.var(x2, ddof=1)

# Calculate the pooled variance
pooled_var = ((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2)

# Calculate the pooled standard deviation
pooled_std = np.sqrt(pooled_var)

t = (mean1-mean2)/(pooled_std*np.sqrt(1/n1+1/n2))

print('t-Statistics is: %.3f' %(t))

t-Statistics is: -2.298


### p-value given t-test 

In [138]:
p_value = stats.t.sf(t, df=n1+n2-2) 
print('p-value is: %.3f' %(1-p_value))

p-value is: 0.020


### Critical Value for t-test given alpha and degree of freedom

In [139]:
#t_{alpha, df}
critical_value =stats.t.ppf(0.05, n1+n2-2)
print('Critical value is: %.3f' %(critical_value))

Critical value is: -1.782


## 2. Calling Python library to estiamte p-value and t-statistics for two samples

In [132]:
statistic,p_value = stats.ttest_ind(x1,x2, alternative='less')
print('t-Statistics is: %.3f, P-value=%.3f' %(statistic,p_value))

t-Statistics is: -2.298, P-value=0.020
