In [28]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import norm, shapiro, bartlett, ttest_1samp, ttest_ind, ttest_rel

<b>1. Additional parameters of ttest_ind function</b>

In [29]:
sample_1 = np.random.normal(loc=34, scale=4, size=100)
sample_2 = np.random.normal(loc=41, scale=5, size=100)

stat, p = ttest_ind(sample_1, sample_2)

alpha = 0.05

print('=== t-Test without additional parameters ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

stat, p = ttest_ind(sample_1, sample_2, equal_var=False)

print('\n=== t-Test without equal variance assumption ===\n',
     't-statistic value: \t {0}\n'.format(stat),
     'p-value: \t\t {0}'.format(p))

# TO-DO: check axis and nan-policy params

=== t-Test without additional parameters ===
 t-statistic value: 	 -12.02611060801656
 p-value: 		 2.2903172008933877e-25

=== t-Test without equal variance assumption ===
 t-statistic value: 	 -12.02611060801656
 p-value: 		 7.84892793990331e-25


<b>2. t-Test of two independent samples</b>

In [49]:
# checking if t-test can be conducted
def check_ttest_assumptions(sample_1, sample_2, alpha=0.05):
    
    # check if both samples come from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample_1))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the first sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    stat, p = shapiro(sorted(sample_2))

    print('\n=== Shapiro-Wilk normality test for the second sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    # check if one sample doesn't have twice as much observations as the other one
    print('\n=== Check if one sample doesn\'t have twica as much observations as the other one ===')
    
    if len(sample_1) >= 2*len(sample_2):
        print('Result: \t\t The first sample has at least twice as many observations as the second one')
    elif len(sample_2) >= 2*len(sample_1):
        print('Result: \t\t The second sample has at least twice as many observations as the first one')
    else:
        print('Result: \t\t Both samples have an acceptable number of observations.')
        
    # check if the variance in both samples is simmilar enough (Bartlett test)
    stat, p = bartlett(sample_1, sample_2)
    
    print('\n=== Bartlett test for the simmilarity of variances in both samples ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis') 

def count_degrees_of_freedom(sample_1, sample_2):
    degrees_of_freedom = len(nerwowi) + len(spokojni) - 2
    print('\nDegrees of freedom: \t {0} + {1} - 2 = {2}'.format(len(nerwowi), len(spokojni), degrees_of_freedom))
    
nerwowi = [3, 3, 4, 5, 5]
spokojni = [4, 6, 7, 9, 9]

check_ttest_assumptions(nerwowi, spokojni)

print('\nHypothesis: \t\t Nervous people have much more energy than steady people.')
print('H0: \t\t\t Średnia wykonanych ruchów w próbie ludzi nerwowych nie jest \n\t\t\t istotnie różna od średniej wykonanych ruchów w próbie ludzi\n\t\t\t spokojnych.')

count_degrees_of_freedom(nerwowi, spokojni)

# t-test for independent samples
stat, p = ttest_ind(nerwowi, spokojni)

alpha = 0.05

print('\n=== t-Test results (two-sided)===', 
      '\np-value:\t\t {0}\nalpha:\t\t\t {1}'.format(p, alpha))

if p <= alpha:
    print('\nResult:\t\t\t p-value is less or equal to alpha - we reject null hypothesis')
else:
    print('\nResult:\t\t\t p-value is greater than alpha - we  can\'t reject null hypothesis')
    
# TO-DO: right-sided t-Test for independent groups and complete conclusion


=== Shapiro-Wilk normality test for the first sample === 
p-value: 		 0.11850986629724503
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Shapiro-Wilk normality test for the second sample === 
p-value: 		 0.4677391052246094
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Check if one sample doesn't have twica as much observations as the other one ===
Result: 		 Both samples have an acceptable number of observations.

=== Bartlett test for the simmilarity of variances in both samples === 
p-value: 		 0.17427550130420436
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

Hypothesis: 		 Nervous people have much more energy than steady people.
H0: 			 Średnia wykonanych ruchów w próbie ludzi nerwowych nie jest 
			 istotnie różna od średniej wykonanych ruchów w próbie ludzi
			 spokojnych.

Degrees of freedom: 	 5 + 5 - 2 = 8

=== t-Test r

<b>3. t-Test for two independent samples</b>

In [65]:
# checking if t-test can be conducted
def check_ttest_assumptions(sample_1, sample_2, alpha=0.05):
    
    # check if both samples come from gaussian distribution (Shapiro-Wilk test)
    stat, p = shapiro(sorted(sample_1))

    alpha = alpha

    print('\n=== Shapiro-Wilk normality test for the first sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    stat, p = shapiro(sorted(sample_2))

    print('\n=== Shapiro-Wilk normality test for the second sample ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis')    

    # check if one sample doesn't have twice as much observations as the other one
    print('\n=== Check if one sample doesn\'t have twica as much observations as the other one ===')
    
    if len(sample_1) >= 2*len(sample_2):
        print('Result: \t\t The first sample has at least twice as many observations as the second one')
    elif len(sample_2) >= 2*len(sample_1):
        print('Result: \t\t The second sample has at least twice as many observations as the first one')
    else:
        print('Result: \t\t Both samples have an acceptable number of observations.')
        
    # check if the variance in both samples is simmilar enough (Bartlett test)
    stat, p = bartlett(sample_1, sample_2)
    
    print('\n=== Bartlett test for the simmilarity of variances in both samples ===',
          '\np-value: \t\t {0}\nalpha: \t\t\t {1}\n'.format(p, alpha))

    if p <= alpha:
        print('Result: \t\t p-value is smaller than or equal to alpha \n \t\t\t We reject null hypothesis')
    else:
        print('Result: \t\t p-value is greater than alpha \n \t\t\t We can\'t reject null hypothesis') 

def count_degrees_of_freedom(sample_1, sample_2):
    degrees_of_freedom = len(nerwowi) + len(spokojni) - 2
    print('\nDegrees of freedom: \t {0} + {1} - 2 = {2}'.format(len(nerwowi), len(spokojni), degrees_of_freedom))
    
mniej30 = [6, 7, 10, 9]
po30 = [5, 6, 2, 3]

check_ttest_assumptions(mniej30, po30)

print('\nHypothesis: \t\t People younger than 30 years are more funny than',
      '\n\t\t\t people older than 30 years\n')
print('H0: \t\t\t Średnia wartość współczynnika Liberta w próbie ludzi',
      '\n\t\t\t poniżej 30 roku życia jest równa średniej wartości',
      '\n\t\t\t współczynnika Liberta w próbie ludzi powyżej',
      '\n\t\t\t 30 roku życia')

count_degrees_of_freedom(mniej30, po30)

# t-test for independent samples
stat, p = ttest_ind(mniej30, po30)

alpha = 0.05

print('\n=== t-Test results ===', 
      '\np-value:\t\t {0}\nalpha:\t\t\t {1}'.format(p, alpha))

if p <= alpha:
    print('\nResult:\t\t\t p-value is less or equal to alpha - we reject null hypothesis')
else:
    print('\nResult:\t\t\t p-value is greater than alpha - we  can\'t reject null hypothesis')
    
# TO-DO: right-sided t-Test for independent groups and complete conclusion


=== Shapiro-Wilk normality test for the first sample === 
p-value: 		 0.714280366897583
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Shapiro-Wilk normality test for the second sample === 
p-value: 		 0.714280366897583
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

=== Check if one sample doesn't have twica as much observations as the other one ===
Result: 		 Both samples have an acceptable number of observations.

=== Bartlett test for the simmilarity of variances in both samples === 
p-value: 		 1.0
alpha: 			 0.05

Result: 		 p-value is greater than alpha 
 			 We can't reject null hypothesis

Hypothesis: 		 People younger than 30 years are more funny than 
			 people older than 30 years

H0: 			 Średnia wartość współczynnika Liberta w próbie ludzi 
			 poniżej 30 roku życia jest równa średniej wartości 
			 współczynnika Liberta w próbie ludzi powyżej 
			 30 roku życia

Degree

<b>4. t-Test for two related samples</b>