In [1]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm, shapiro, bartlett, ttest_1samp, ttest_ind, ttest_rel, mannwhitneyu

<b>1. Additional parameters of ttest_ind function</b>

In [2]:
sample_1 = np.random.normal(loc=34, scale=4, size=100)
sample_2 = np.random.normal(loc=41, scale=5, size=80)

stat, p = ttest_ind(sample_1, sample_2)

alpha = 0.05

print('=== t-Test without additional parameters ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

stat, p = ttest_ind(sample_1, sample_2, equal_var=False)

print('\n=== t-Test without equal variance assumption ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

sample_3 = np.random.normal(loc=52, scale=7, size=(50, 5))
sample_4 = np.random.normal(loc=42, scale=10, size=(50, 5))

stat, p = ttest_ind(sample_3, sample_4)

print('\n=== t-Test with two-dimensional samples and not specified axis ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

stat, p = ttest_ind(sample_3, sample_4, axis=1)

print('\n=== t-Test with two-dimensional samples and specified axis ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

sample_1[0] = float('NaN')
stat, p = ttest_ind(sample_1, sample_2)

print('\n=== t-Test with nan value in one sample ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

stat, p = ttest_ind(sample_1, sample_2, nan_policy='omit')

print('\n=== t-Test with nan value in one sample and \'omit\' assumption ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

=== t-Test without additional parameters ===
 t-statistic value: 	 -9.889600380551899
 p-value: 		 1.1772213250507584e-18

=== t-Test without equal variance assumption ===
 t-statistic value: 	 -9.731156208147805
 p-value: 		 8.35099406562247e-18

=== t-Test with two-dimensional samples and not specified axis ===
 t-statistic value: 	 [7.44503121 6.81329957 4.7249701  7.68209464 5.4193892 ]
 p-value: 		 [3.79854208e-11 7.78256392e-10 7.69530287e-06 1.20167797e-11
 4.28708484e-07]

=== t-Test with two-dimensional samples and specified axis ===
 t-statistic value: 	 [ 1.519499    2.83753426  1.08017939  2.6531983   2.33032116  2.58500032
  2.57895809  3.15831888  1.58792869  0.39874818  2.44330383  3.49460108
  2.56592312  0.23712041  0.8656111   6.46733232  0.96731488  0.44144061
  6.81264025  1.618601    1.94073091  1.37595891  1.51621568  2.88252521
  1.92089232  1.52811315  1.59409108 -0.26822385  3.53346666  4.03456241
  2.41950205  1.45625912  2.64215403  0.77928829  1.12827353  1.

  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


<b>2. t-Test of two independent samples</b>

In [3]:
# checking if t-test can be conducted
def check_ttest_assumptions(sample_1, sample_2, alpha=0.05):
    
    # check if both samples come from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample_1))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the first sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    stat, p = shapiro(sorted(sample_2))

    print('\n=== Shapiro-Wilk normality test for the second sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    # check if one sample doesn't have twice as much observations as the other one
    print('\n=== Check if one sample doesn\'t have twica as much observations as the other one ===')
    
    if len(sample_1) >= 2*len(sample_2):
        print('Result: \t\t The first sample has at least twice as many observations as the second one')
    elif len(sample_2) >= 2*len(sample_1):
        print('Result: \t\t The second sample has at least twice as many observations as the first one')
    else:
        print('Result: \t\t Both samples have an acceptable number of observations.')
        
    # check if the variance in both samples is simmilar enough (Bartlett test)
    stat, p = bartlett(sample_1, sample_2)
    
    print('\n=== Bartlett test for the simmilarity of variances in both samples ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis') 

def count_degrees_of_freedom(sample_1, sample_2):
    degrees_of_freedom = len(sample_1) + len(sample_2) - 2
    print('\nDegrees of freedom: \t {0} + {1} - 2 = {2}'.format(len(sample_1), len(sample_2), degrees_of_freedom))
    
nerwowi = [3, 3, 4, 5, 5]
spokojni = [4, 6, 7, 9, 9]

check_ttest_assumptions(nerwowi, spokojni)

print('\nHypothesis: \t\t Nervous people have much more energy than steady people.')
print('H0: \t\t\t Średnia wykonanych ruchów w próbie ludzi nerwowych nie jest \n\t\t\t istotnie różna od średniej wykonanych ruchów w próbie ludzi\n\t\t\t spokojnych.')

count_degrees_of_freedom(nerwowi, spokojni)

# t-test for independent samples
stat, p = ttest_ind(nerwowi, spokojni)

alpha = 0.05

print('\n=== Two-sided t-Test results ===', 
      '\np-value:\t\t {0}\nalpha:\t\t\t {1}'.format(p, alpha))

if p <= alpha:
    print('\nResult:\t\t\t p-value is less or equal to alpha - we reject null hypothesis')
else:
    print('\nResult:\t\t\t p-value is greater than alpha - we  can\'t reject null hypothesis')
    
# one-sided t-Test for independent groups
p_one_sided = p/2

alpha = 0.05

print('\n=== One-sided t-Test results ===', 
      '\np-value:\t\t {0}\nalpha:\t\t\t {1}'.format(p_one_sided, alpha))

if p_one_sided <= alpha:
    print('\nResult:\t\t\t p-value is less or equal to alpha - we reject null hypothesis')
else:
    print('\nResult:\t\t\t p-value is greater than alpha - we  can\'t reject null hypothesis')


=== Shapiro-Wilk normality test for the first sample === 
p-value: 		 0.11850986629724503
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Shapiro-Wilk normality test for the second sample === 
p-value: 		 0.4677391052246094
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Check if one sample doesn't have twica as much observations as the other one ===
Result: 		 Both samples have an acceptable number of observations.

=== Bartlett test for the simmilarity of variances in both samples === 
p-value: 		 0.17427550130420436
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

Hypothesis: 		 Nervous people have much more energy than steady people.
H0: 			 Średnia wykonanych ruchów w próbie ludzi nerwowych nie jest 
			 istotnie różna od średniej wykonanych ruchów w próbie ludzi
			 spokojnych.

Degrees of freedom: 	 5 + 5 - 2 = 8

=== Two-side

Complete Conclusions: Both samples <b>come from gaussian distribution</b>. Both samples <b>have an acceptable number of observations</b>. Both samples <b>have similar variance</b>. T statistic <b>has 8 degrees of freedom</b>. We reject null hypothesis - <b>samples come from different distributions</b>.

<b>3. t-Test for two independent samples</b>

In [4]:
# checking if t-test can be conducted
def check_ttest_assumptions(sample_1, sample_2, alpha=0.05):
    
    # check if both samples come from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample_1))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the first sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    stat, p = shapiro(sorted(sample_2))

    print('\n=== Shapiro-Wilk normality test for the second sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    # check if one sample doesn't have twice as much observations as the other one
    print('\n=== Check if one sample doesn\'t have twica as much observations as the other one ===')
    
    if len(sample_1) >= 2*len(sample_2):
        print('Result: \t\t The first sample has at least twice as many observations as the second one')
    elif len(sample_2) >= 2*len(sample_1):
        print('Result: \t\t The second sample has at least twice as many observations as the first one')
    else:
        print('Result: \t\t Both samples have an acceptable number of observations.')
        
    # check if the variance in both samples is simmilar enough (Bartlett test)
    stat, p = bartlett(sample_1, sample_2)
    
    print('\n=== Bartlett test for the simmilarity of variances in both samples ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis') 

def count_degrees_of_freedom(sample_1, sample_2):
    degrees_of_freedom = len(sample_1) + len(sample_2) - 2
    print('\nDegrees of freedom: \t {0} + {1} - 2 = {2}'.format(len(sample_1), len(sample_2), degrees_of_freedom))
    
mniej30 = [6, 7, 10, 9]
po30 = [5, 6, 2, 3]

check_ttest_assumptions(mniej30, po30)

print('\nHypothesis: \t\t People younger than 30 years are more funny than',
      '\n\t\t\t people older than 30 years\n')
print('H0: \t\t\t Średnia wartość współczynnika Liberta w próbie ludzi',
      '\n\t\t\t poniżej 30 roku życia jest równa średniej wartości',
      '\n\t\t\t współczynnika Liberta w próbie ludzi powyżej',
      '\n\t\t\t 30 roku życia')

count_degrees_of_freedom(mniej30, po30)

# two-sided t-test for independent samples
stat, p = ttest_ind(mniej30, po30)

alpha = 0.05

print('\n=== Two-sided t-Test results ===', 
      '\np-value:\t\t {0}\nalpha:\t\t\t {1}'.format(p, alpha))

if p <= alpha:
    print('\nResult:\t\t\t p-value is less or equal to alpha - we reject null hypothesis')
else:
    print('\nResult:\t\t\t p-value is greater than alpha - we  can\'t reject null hypothesis')
    
# one-sided t-Test for independent groups
p_one_sided = p/2

alpha = 0.05

print('\n=== One-sided t-Test results ===', 
      '\np-value:\t\t {0}\nalpha:\t\t\t {1}'.format(p_one_sided, alpha))

if p_one_sided <= alpha:
    print('\nResult:\t\t\t p-value is less or equal to alpha - we reject null hypothesis')
else:
    print('\nResult:\t\t\t p-value is greater than alpha - we  can\'t reject null hypothesis')



=== Shapiro-Wilk normality test for the first sample === 
p-value: 		 0.714280366897583
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Shapiro-Wilk normality test for the second sample === 
p-value: 		 0.714280366897583
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Check if one sample doesn't have twica as much observations as the other one ===
Result: 		 Both samples have an acceptable number of observations.

=== Bartlett test for the simmilarity of variances in both samples === 
p-value: 		 1.0
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

Hypothesis: 		 People younger than 30 years are more funny than 
			 people older than 30 years

H0: 			 Średnia wartość współczynnika Liberta w próbie ludzi 
			 poniżej 30 roku życia jest równa średniej wartości 
			 współczynnika Liberta w próbie ludzi powyżej 
			 30 roku życia

Degree

Complete Conclusions: Both samples <b>come from gaussian distribution</b>. Both samples <b>have an acceptable number of observations</b>. Both samples <b>have similar variance</b>. T statistic <b>has 6 degrees of freedom</b>. We reject null hypothesis - <b>samples come from different distributions</b>.

<b>4. t-Test for two "related" samples (salary of male and female graduates)</b>

In [5]:
# checking if t-test can be conducted
def check_ttest_assumptions(sample_1, sample_2, alpha=0.05):
    
    # check if both samples come from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample_1))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the first sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    stat, p = shapiro(sorted(sample_2))

    print('\n=== Shapiro-Wilk normality test for the second sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    # check if one sample doesn't have twice as much observations as the other one
    print('\n=== Check if one sample doesn\'t have twica as much observations as the other one ===')
    
    if len(sample_1) >= 2*len(sample_2):
        print('Result: \t\t The first sample has at least twice as many observations as the second one')
    elif len(sample_2) >= 2*len(sample_1):
        print('Result: \t\t The second sample has at least twice as many observations as the first one')
    else:
        print('Result: \t\t Both samples have an acceptable number of observations.')
        
    # check if the variance in both samples is simmilar enough (Bartlett test)
    stat, p = bartlett(sample_1, sample_2)
    
    print('\n=== Bartlett test for the simmilarity of variances in both samples ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis') 

df = pd.read_csv(os.path.join('dane_listy_3_-_5', 'absolwenci.csv'), sep=';', engine='python')

check_ttest_assumptions(df[df['GENDER'] == 'Mezczyzna']['SALARY'], df[df['GENDER'] == 'Kobieta']['SALARY'])
df.head()


=== Shapiro-Wilk normality test for the first sample === 
p-value: 		 5.960432591933795e-09
alpha: 			 0.05

Result: 		 p-value is smaller than or equal to alpha 
 			 We reject null hypothesis

=== Shapiro-Wilk normality test for the second sample === 
p-value: 		 6.71039833832765e-06
alpha: 			 0.05

Result: 		 p-value is smaller than or equal to alpha 
 			 We reject null hypothesis

=== Check if one sample doesn't have twica as much observations as the other one ===
Result: 		 Both samples have an acceptable number of observations.

=== Bartlett test for the simmilarity of variances in both samples === 
p-value: 		 0.9311518203259435
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis


Unnamed: 0,GRADUATE,GENDER,COLLEGE,SALARY,DEGREE,GRADDATE
0,1,Mezczyzna,Inzynieria,28900,Licencjat,Jesien 89
1,2,Mezczyzna,Inzynieria,28000,Licencjat,Jesien 89
2,3,Mezczyzna,Rolnictwo,27500,Licencjat,Jesien 89
3,4,Mezczyzna,Inzynieria,30300,Licencjat,Jesien 89
4,5,Mezczyzna,Rolnictwo,18000,Licencjat,Jesien 89


Result: <b>We can't conduct t-Test for two related samples</b>, because those samples have data about salaries of <b>two different group of people</b>. In addition both samples <b>don't come from gaussian distribution</b>.<br/><br/>
Solution: We would need data about graduates' salary <b>before they started studies</b> or their salary <b>5 years after graduation</b>.

<b>5. One sample t-Test</b>

In [6]:
# checking if one sample t-test can be conducted
def check_ttest_one_sample_assumptions(sample, alpha=0.05):
    
    # check if sample comes from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis') 
    
    print('\nSample mean: \t\t {0}\nSample variance: \t {1}\nSample std dev: \t {2}\n'.format(np.mean(sample),np.var(sample), np.std(sample)))


data13 = [175.26, 177.8, 167.64000000000001, 160.02, 172.72, 177.8, 175.26,
          170.18, 157.48, 160.02, 193.04, 149.86, 157.48, 157.48, 190.5,
          157.48, 182.88, 160.02]

check_ttest_one_sample_assumptions(data13)

hypothetic_mean = 169.051

stat, p = ttest_1samp(data13, hypothetic_mean)

alpha = 0.05

print('=== One sample t-Test results ===',
      '\nHypothetic mean: \t {0}'.format(hypothetic_mean),
      '\nt-statistic value: \t {0} \np-value: \t\t {1} \nalpha: \t\t\t {2}\n'
      .format(stat, p, alpha))

if p <= alpha:
    print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
else:
    print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')


=== Shapiro-Wilk normality test for the sample === 
p-value: 		 0.21162712574005127
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

Sample mean: 		 169.0511111111111
Sample variance: 	 145.67872098765432
Sample std dev: 	 12.069744031571437

=== One sample t-Test results === 
Hypothetic mean: 	 169.051 
t-statistic value: 	 3.795630181939122e-05 
p-value: 		 0.9999701571059357 
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis


<b>6. One sample t-Test for sample containing height of students</b>

In [7]:
# checking if one sample t-test can be conducted
def check_ttest_one_sample_assumptions(sample, alpha=0.05):
    
    # check if sample comes from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis') 
    
    print('\nSample mean: \t\t {0}\nSample variance: \t {1}\nSample std dev: \t {2}\n'.format(np.mean(sample),np.var(sample), np.std(sample)))


data17 = [172.72, 157.48, 170.18, 172.72, 175.26, 170.18, 154.94, 149.86,
          157.48, 154.94, 175.26, 167.64000000000001, 157.48, 157.48, 154.94,
          177.8]

check_ttest_one_sample_assumptions(data17)

hypothetic_mean = 164.1475

stat, p = ttest_1samp(data17, hypothetic_mean)

alpha = 0.05

print('=== One sample t-Test results ===',
      '\nHypothetic mean: \t {0}'.format(hypothetic_mean),
      '\nt-statistic value: \t {0} \np-value: \t\t {1} \nalpha: \t\t\t {2}\n'
      .format(stat, p, alpha))

if p <= alpha:
    print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
else:
    print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')


=== Shapiro-Wilk normality test for the sample === 
p-value: 		 0.036915361881256104
alpha: 			 0.05

Result: 		 p-value is smaller than or equal to alpha 
 			 We reject null hypothesis

Sample mean: 		 164.14749999999998
Sample variance: 	 81.35064375000002
Sample std dev: 	 9.019459171702039

=== One sample t-Test results === 
Hypothetic mean: 	 164.1475 
t-statistic value: 	 -1.2204368931571835e-14 
p-value: 		 0.9999999999999905 
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis


<b>7. Mann-Whitney U-test for two samples</b>

In [8]:
# checking normality of two samples
def check_normality_two_samples(sample_1, sample_2, alpha=0.05):
    
    # check if both samples come from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample_1))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the first sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    stat, p = shapiro(sorted(sample_2))

    print('\n=== Shapiro-Wilk normality test for the second sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

nerwowi = [3, 3, 4, 5, 5]
spokojni = [4, 6, 7, 9, 9]

check_normality_two_samples(nerwowi, spokojni)

print('\nMedian difference: \t {0}'.format(np.abs(np.median(nerwowi)-np.median(spokojni))),
     '\nMean difference: \t {0}'.format(np.abs(np.mean(nerwowi)-np.mean(spokojni))))

print('\nHypothesis: \t\t Nervous people have much more energy than steady people.')
print('H0: \t\t\t Średnia wykonanych ruchów w próbie ludzi nerwowych nie jest \n\t\t\t istotnie różna od średniej wykonanych ruchów w próbie ludzi\n\t\t\t spokojnych.')

stat, p = mannwhitneyu(nerwowi, spokojni, alternative='two-sided')

alpha = 0.05

print('\n=== Two-sided Mann-Whitney U-test results ===',
      '\nu-statistic value: \t {0} \np-value: \t\t {1} \nalpha: \t\t\t {2}\n'
      .format(stat, p, alpha))

if p <= alpha:
    print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
else:
    print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')
    
stat, p = mannwhitneyu(nerwowi, spokojni, alternative='greater')

alpha = 0.05

print('\n=== Right-sided Mann-Whitney U-test results ===',
      '\nu-statistic value: \t {0} \np-value: \t\t {1} \nalpha: \t\t\t {2}\n'
      .format(stat, p, alpha))

if p <= alpha:
    print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
else:
    print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')


=== Shapiro-Wilk normality test for the first sample === 
p-value: 		 0.11850986629724503
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Shapiro-Wilk normality test for the second sample === 
p-value: 		 0.4677391052246094
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

Median difference: 	 3.0 
Mean difference: 	 3.0

Hypothesis: 		 Nervous people have much more energy than steady people.
H0: 			 Średnia wykonanych ruchów w próbie ludzi nerwowych nie jest 
			 istotnie różna od średniej wykonanych ruchów w próbie ludzi
			 spokojnych.

=== Two-sided Mann-Whitney U-test results === 
u-statistic value: 	 2.5 
p-value: 		 0.04453861592762467 
alpha: 			 0.05

Result: 		 p-value is smaller than or equal to alpha 
 			 We reject null hypothesis

=== Right-sided Mann-Whitney U-test results === 
u-statistic value: 	 2.5 
p-value: 		 0.986806161200152 
alpha: 			 0.05

Result: 		 p-value is

Complete conclusion: We <b>can use Mann-Whitney U-test</b>. Both samples <b>don't come from the same distribution</b>. Nervous people gesture count sample mean <b>isn't greater than</b> steady people gesture count sample mean.

<b>8. Mann-Whitney U-test for two samples containing height of the students</b>

In [9]:
# checking normality of two samples
def check_normality_two_samples(sample_1, sample_2, alpha=0.05):
    
    # check if both samples come from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample_1))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the first sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    stat, p = shapiro(sorted(sample_2))

    print('\n=== Shapiro-Wilk normality test for the second sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

data13 = [175.26, 177.8, 167.64, 160.02, 172.72, 177.8, 175.26, 170.18, 157.48,
          160.02, 193.04, 149.86, 157.48, 157.48, 190.5, 157.48, 182.88, 160.02]

data17 = [172.72, 157.48, 170.18, 172.72, 175.26, 170.18, 154.94, 149.86,
          157.48, 154.94, 175.26, 167.64, 157.48, 157.48, 154.94, 177.8]

check_normality_two_samples(data13, data17)

print('\nMedian difference: \t {0}'.format(np.median(data13)-np.median(data17)),
     '\nMean difference: \t {0}'.format(np.mean(data13)-np.mean(data17)))

print('\nHypothesis: \t\t The height of first group of students is equal to the height of second group of students.')
print('H0: \t\t\t Średni wzrost w próbie studentów pierwszej grupy zajęciowej nie jest \n\t\t\t istotnie różny od średniego wzrostu w próbie studentów z drugiej grupy zajęciowej.')

stat, p = mannwhitneyu(data13, data17, alternative='two-sided')

alpha = 0.05

print('\n=== Two-sided Mann-Whitney U-test results ===',
      '\nu-statistic value: \t {0} \np-value: \t\t {1} \nalpha: \t\t\t {2}\n'
      .format(stat, p, alpha))

if p <= alpha:
    print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
else:
    print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')
    


=== Shapiro-Wilk normality test for the first sample === 
p-value: 		 0.21162712574005127
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Shapiro-Wilk normality test for the second sample === 
p-value: 		 0.036915361881256104
alpha: 			 0.05

Result: 		 p-value is smaller than or equal to alpha 
 			 We reject null hypothesis

Median difference: 	 6.349999999999994 
Mean difference: 	 4.9036111111111325

Hypothesis: 		 The height of first group of students is equal to the height of second group of students.
H0: 			 Średni wzrost w próbie studentów pierwszej grupy zajęciowej nie jest 
			 istotnie różny od średniego wzrostu w próbie studentów z drugiej grupy zajęciowej.

=== Two-sided Mann-Whitney U-test results === 
u-statistic value: 	 186.0 
p-value: 		 0.14852918271604412 
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis


Complete Conclusion: One group of students <b>doesn't have greater height</b> than the other group of students. The second sample of students' heights <b>doesn't come from gaussian distribution</b>.