In [1]:
import numpy as np
from scipy import stats
import pandas as pd

In [10]:
#1 sample
#Test of population mean, Population sd known
#one-tailed test
def cal_z_score_1samp(sample_mean,pop_mean,sample_size,
                      pop_sd):
    sample_sd = pop_sd/np.sqrt(sample_size)
    z_score = (sample_mean-pop_mean)/sample_sd
    return z_score

def cal_p_value(z_value):
    if z_value <= 0:
        p_val = stats.norm.cdf(z_value)
    else:
        p_val = 1 - stats.norm.cdf(z_value)
        
    return p_val

def cal_critical_value(confidence,flag):
    if flag==True:
        critical_val = stats.norm.ppf(confidence)
    else:
        critical_val = stats.norm.ppf(1-confidence)
    
    return critical_val

In [8]:
sample_mean = 32
pop_mean = 30
pop_sd = 10
sample_size = 30
alpha = 0.05
flag = False

In [6]:
#P-value approach
z_val = cal_z_score_1samp(sample_mean,pop_mean,sample_size,
                          pop_sd)
print("Z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val)
if p_val < alpha:
    print("We reject null hypothesis")
else:
    print("We do not reject null hypothesis")

Z-value 1.0954451150103321
P-value 0.13666083914614902
We do not reject null hypothesis


In [11]:
#Critical value approach
critical_val = cal_critical_value(alpha,flag)
print(critical_val)

1.6448536269514722


In [12]:
#Test of population mean
#Two-tail test
sample_mean = 505
pop_mean = 500
pop_sd = 10
sample_size = 30
alpha = 0.03

In [14]:
#p-value approach
z_val = cal_z_score_1samp(sample_mean,pop_mean,sample_size,
                          pop_sd)
print("Z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val)
if p_val < alpha/2:
    print("We reject null hypothesis")
else:
    print("We do not reject null hypothesis")

Z-value 2.7386127875258306
P-value 0.0030849496602720627
We reject null hypothesis


In [15]:
#Critical-value approach
critical_val = cal_critical_value(alpha/2,flag = True)
print(critical_val)

-2.1700903775845606


In [18]:
#Test of population mean, Population sd unknown
path = 'icecream sale data.xlsx'
df_icecream = pd.read_excel(path,engine = 'openpyxl')
def cal_z_value_1samp_sd_unknown(sample_mean,pop_mean,sample_sd,
                                 sample_size):
    combined_sd = sample_sd/np.sqrt(sample_size)
    z_val = (sample_mean-pop_mean)/combined_sd
    return z_val
df_icecream.head()

Unnamed: 0,Day,Number of ice cream sold
0,1,13
1,2,8
2,3,10
3,4,10
4,5,8


In [23]:
x = df_icecream['Number of ice cream sold'].values
sample_mean = np.mean(x)
sample_sd = np.std(x)
sample_size = len(x)
dof = sample_size-1
alpha = 0.05
pop_mean = 10
print(x)

[13  8 10 10  8  9 10 11  6  8 12 11 11 12 10 12  7 10 11  8]


In [24]:
z_val = cal_z_value_1samp_sd_unknown(sample_mean,pop_mean,sample_sd,
                                   sample_size)
print("Z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val)

Z-value -0.3677453795254057
P-value 0.35653155000268266


In [25]:
critical_z_value = stats.t.ppf(alpha,dof)
print(critical_z_value)

-1.7291328115213678


In [26]:
#direct method
stats.ttest_1samp(x,pop_mean)

Ttest_1sampResult(statistic=-0.35843385854878496, pvalue=0.7239703579964252)

In [27]:
#Population proportion test
def cal_z_value_1samp_prop(sample_prop,pop_prop,
                               sample_size):
    sample_sd = np.sqrt(pop_prop*(1-pop_prop)/sample_size)
    z_val = (sample_prop-pop_prop)/sample_sd
    return z_val

count = 67
sample_size = 120
sample_prop = count/sample_size
pop_prop = 0.5
alpha = 0.05

In [28]:
z_val = cal_z_value_1samp_prop(sample_prop,pop_prop,sample_size)
print("Z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val)

Z-value 1.278019300845388
P-value 0.10062131047886202


In [30]:
#direct method
from statsmodels.stats.proportion import proportions_ztest
proportions_ztest(count,sample_size,pop_prop)

(1.286806739751111, 0.1981616572238455)