In [54]:
import scipy.stats as st
import numpy as np
import pandas as pd
import math as mt
import statistics as stat
import statsmodels.stats.weightstats as mod
import statsmodels.stats.proportion as prop
np.random.seed(1)

def t_test(sample, hypoth_value, sig_level, test_type):
    sample_mean = stat.mean(sample)
    sample_st_dev = stat.stdev(sample)
    sample_size = len(sample)
    test_statistic = (sample_mean - hypoth_value) / (sample_st_dev/(mt.sqrt(sample_size)))
    if test_type == 'lower':
        p_value = st.t.cdf(test_statistic,df=sample_size-1)
    if test_type == 'upper':
        p_value = 1 - st.t.cdf(test_statistic,df=sample_size-1)
    if test_type == 'two':
        p_value = 2 * (1 - st.t.cdf(abs(test_statistic), df=sample_size-1))
    print(f'P Value = {p_value}')
    if p_value < sig_level:
        print(f'Results are significant.  Reject the Null')
    else:
        print(f'Results are insignificant.  Do Not Reject the Null')
    return (test_statistic, p_value)

# Randomly Sample from Normal Distribution mu=50 and st_dev = 10
data1 = np.random.normal(50, 10, 100)

#two-tailed test = Is the sample mean significantly different from 50?
print('large sample')
print(f'Sample mean: {stat.mean(data1)}')
t_test(data1,50,0.05,'two')
#lower tailed = Is the sample mean significantly lower than 51?
t_test(data1,51,0.05,'lower')
#upper tailed = is the sample mean significantly more than 48?
t_test(data1,48,0.05,'upper')


large sample
Sample mean: 50.60582852075699
P Value = 0.4974609984410545
Results are insignificant.  Do Not Reject the Null
P Value = 0.32933701868279674
Results are insignificant.  Do Not Reject the Null
P Value = 0.002109341573010237
Results are significant.  Reject the Null


(2.9291627860623524, 0.002109341573010237)

In [59]:
# select the first 5 elements of the data set
data2 = data1[:5]
print(data2)

#two-tailed test = Is the sample mean significantly different from 50?
print('small sample')
print(f'Sample mean: {stat.mean(data2)}')
t_test(data2,50,0.05,'two')
#lower tailed = Is the sample mean significantly lower than 51?
t_test(data2,51,0.05,'lower')
#upper tailed = is the sample mean significantly more than 48?
t_test(data2,48,0.05,'upper')


[66.24345364 43.88243586 44.71828248 39.27031378 58.65407629]
small sample
Sample mean: 50.553712409836436
P Value = 0.918572770568147
Results are insignificant.  Do Not Reject the Null
P Value = 0.4671568669546634
Results are insignificant.  Do Not Reject the Null
P Value = 0.32103491333328793
Results are insignificant.  Do Not Reject the Null


(0.5019562701344552, 0.32103491333328793)

In [57]:
data1


array([66.24345364, 43.88243586, 44.71828248, 39.27031378, 58.65407629,
       26.98461303, 67.44811764, 42.38793099, 53.19039096, 47.50629625,
       64.62107937, 29.39859291, 46.77582796, 46.15945645, 61.33769442,
       39.00108733, 48.27571792, 41.22141582, 50.42213747, 55.82815214,
       38.99380823, 61.4472371 , 59.01590721, 55.02494339, 59.00855949,
       43.16272141, 48.77109774, 40.64230566, 47.3211192 , 55.30355467,
       43.08339248, 46.03246473, 43.128273  , 41.54794359, 43.28753869,
       49.87335401, 38.82689651, 52.34415698, 66.59802177, 57.42044161,
       48.08164448, 41.12371036, 42.52841706, 66.92454601, 50.50807755,
       43.63004353, 51.90915485, 71.00255136, 51.20158952, 56.1720311 ,
       53.0017032 , 46.47750154, 38.57481802, 46.50657278, 47.91105767,
       55.86623191, 58.38983414, 59.31102081, 52.85587325, 58.85141164,
       42.45602059, 62.52868155, 55.1292982 , 47.01907165, 54.88518147,
       49.24428287, 61.31629387, 65.19816816, 71.85575407, 36.03