In [2]:
import numpy as np
from scipy import stats


In [3]:
# Data from parachute drops
a = np.array([4.4,4.11,4.7,4.64])
b = np.array([3.78,4.1,3.93,4.72,3.56,3.92])
# average number of data points per sample
N = (len(a)+len(b))/2

In [12]:
a_mean = np.mean(a)
b_mean = np.mean(b)
print(f' mean of a = {a_mean}')
print(f' mean of b = {b_mean}')
a_std = np.std(a)
b_std = np.std(b)
print(f' stdev of a = {a_std}')
print(f' stdev of b = {b_std}')

 mean of a = 4.4625
 mean of b = 4.001666666666666
 stdev of a = 0.23241934084752913
 stdev of b = 0.3609439784170883


In [3]:
# Compute the variance
var_a = a.var(ddof=1)
var_b = b.var(ddof=1)


In [4]:
# Compute the standard deviation
s = np.sqrt((var_a + var_b)/2)
print(f'standard deviation = {round(s,2)} s')

standard deviation = 0.34 s


In [5]:
## Calculate the t-statistics
t = (a.mean() - b.mean())/(s*np.sqrt(2/N))
print(f't-value = {round(t,2)} ')

t-value = 2.16 


In [6]:
## Compare with the critical t-value
#Degrees of freedom
df = len(a)+len(b)-2
print(f'degrees of freedom = {round(df,0)} ')

degrees of freedom = 8 


In [7]:
#p-value after comparison with the t 
p = 1 - stats.t.cdf(t,df=df)
print(f'after comparision with t, p-value = {round(p,3)} ')

after comparision with t, p-value = 0.032 


In [8]:
print("t = " + str(t))
print("p = " + str(2*p))
#Note that we multiply the p value by 2 because its a twp tail t-test
### You can see that after comparing the t statistic with the critical t value (computed internally)
### we get a good p value of 0.032 and thus we reject the null hypothesis
### and thus it proves that the mean of the two distributions are different and statistically significant.

t = 2.156340229982052
p = 0.0631484553773829


In [9]:
#based on chart https://towardsdatascience.com/inferential-statistics-series-t-test-using-numpy-2718f8f9bf2f
# t @ 90% confidence and 8 degrees of freedom = 1.397

# if calculated t-value (from the data) is greater than critical t-value (from the table), 
# the two samples are statistically different at a 90% confidence interval

t_crit = 1.397
if t>t_crit:
    print(f'calculated t = {round(t,2)} is greater than critical t = {round(t_crit,3)} ')
    print('The two samples are statistically different from eachother within a 90% confidence interval')
    print(f'p-value of p = {round(2*p,2)} indicates there is a {100*round(2*p,2)} % chance the experiment happend by chance')
else:
    print(f'calculated t={round(t,2)} is smaller than critical t= {round(t_crit)} ')
    print('The two samples are not statistically different from eachother within a 90% confidence interval')

calculated t = 2.16 is greater than critical t = 1.397 
The two samples are statistically different from eachother within a 90% confidence interval
p-value of p = 0.06 indicates there is a 6.0 % chance the experiment happend by chance


In [12]:
# use scipy's built in function to claculate t-value and p-value

t2, p2 = stats.ttest_ind(a,b, equal_var=False)
print(t2)
print(p2)

2.195381512014234
0.059580188807867675
