In [1]:
#2 sample
#Population mean test, sd known
import numpy as np
from scipy import stats
import pandas as pd

In [10]:
def cal_z_value_2samp_sdknown(sample1_mean,sample2_mean,pop1_sd,
                            pop2_sd,sample1_size,sample2_size):
    pop1_var = pop1_sd**2
    pop2_var = pop2_sd**2
    combined_sd = np.sqrt(pop1_var/sample1_size + pop2_var/sample2_size)
    z_val = (sample1_mean-sample2_mean)/combined_sd
    return z_val


def cal_p_value(z_value):
    if z_value <= 0:
        p_val = stats.norm.cdf(z_value)
    else:
        p_val = 1 - stats.norm.cdf(z_value)
        
    return p_val

In [11]:
sample1_mean = 121
sample2_mean = 112
pop1_sd = 8
pop2_sd = 8
sample1_size = 10
sample2_size = 10

In [12]:
z_val = cal_z_value_2samp_sdknown(sample1_mean,sample2_mean,pop1_sd,
                                  pop2_sd,sample1_size,sample2_size)
print("z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val)

z-value 2.5155764746872635
P-value 0.00594189462107364


In [18]:
#population mean test, sd unknown
#sd assumed equal
def cal_z_value_2samp_sdunknownequal(sample1_mean,sample2_mean,sample1_sd,
                                sample2_sd,sample1_size,sample2_size,dof):
    sample1_var = sample1_sd**2
    sample2_var = sample2_sd**2
    sample_var = ((sample1_size-1)*sample1_var + (sample2_size-1)*sample2_var)
    sample_var = sample_var/dof
    sample_sd = np.sqrt(sample_var)
    combined_sd = sample_sd*np.sqrt(1/sample1_size + 1/sample2_size)
    t_val = (sample1_mean - sample2_mean)/combined_sd
    return t_val

In [19]:
a = [91.50,94.18,92.18,95.39,91.79,89.07,94.72,89.21]
b = [89.19,90.95,90.46,93.21,97.19,97.04,91.07,92.75]
sample1_mean = np.mean(a)
sample2_mean = np.mean(b)
sample1_sd = np.std(a)
sample2_sd = np.std(b)
sample1_size = len(a)
sample2_size = len(b)
dof = sample1_size + sample2_size - 2
alpha = 0.05

In [20]:
#p-value approach
z_val = cal_z_value_2samp_sdunknownequal(sample1_mean,sample2_mean,sample1_sd,
                                         sample2_sd,sample1_size,sample2_size,dof)
print("Z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val)

Z-value -0.37800453413619045
P-value 0.3527136128509373


In [22]:
#critical value method
critical_val = stats.t.ppf(alpha/2,dof)
print(critical_val)

-2.1447866879169277


In [23]:
#direct method
stats.ttest_ind(a,b,equal_var = True)

Ttest_indResult(statistic=-0.3535908643461798, pvalue=0.7289136186068217)

In [27]:
#population mean test, sd unknown
#assumed unequal
def cal_z_value_2samp_sdunknown_unequal(sample1_mean,sample2_mean,
                                        sample1_sd,sample2_sd,sample1_size,
                                        sample2_size):
    sample1_var = sample1_sd**2
    sample2_var = sample2_sd**2
    combined_sd = np.sqrt(sample1_var/sample1_size + sample2_var/sample2_size)
    z_val = (sample1_mean - sample2_mean)/combined_sd
    return z_val

def cal_dof_2samp_sdunknown_unequal(sample1_sd,sample2_sd, sample1_size,
                                    sample2_size):
    sample1_var = sample1_sd**2
    sample2_var = sample2_sd**2
    deno = (sample1_var/sample1_size)**2/(sample1_size-1)
    deno = deno + (sample2_var/sample2_size)**2/(sample2_size-1)
    num = (sample1_var/sample1_size) + (sample2_var/sample2_size)
    num = num**2
    dof = num//deno
    return dof

In [25]:
a = [3,7,25,10,15,6,12,25,15,7]
b = [48,44,40,38,33,21,20,12,1,18]
sample1_mean = np.mean(a)
sample2_mean = np.mean(b)
sample1_sd = np.std(a)
sample2_sd = np.std(b)
sample1_size = len(a)
sample2_size = len(b)

In [28]:
#p-value approach
z_val = cal_z_value_2samp_sdunknown_unequal(sample1_mean,sample2_mean,
                                        sample1_sd,sample2_sd,sample1_size,
                                        sample2_size)
print("z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val)

z-value -2.9166104054345077
P-value 0.001769287316054508


In [31]:
#critical value approach
dof = cal_dof_2samp_sdunknown_unequal(sample1_sd,sample2_sd, sample1_size,
                                    sample2_size)
critical_value = stats.t.ppf(alpha/2,dof)
print(critical_val)

-2.1447866879169277


In [32]:
#direct method
stats.ttest_ind(a,b,equal_var = False)

Ttest_indResult(statistic=-2.7669395785560558, pvalue=0.015827284816100885)

In [37]:
# 2 sample population proportion
def cal_z_value_2samp_prop(sample1_prop,sample2_prop,sample1_size,
                           sample2_size):
    p_bar = sample1_size*sample1_prop + sample2_size*sample2_prop
    p_bar = p_bar/(sample1_size+sample2_size)
    combined_sd = p_bar*(1-p_bar)*(1/sample1_size + 1/sample2_size)
    combined_sd = np.sqrt(combined_sd)
    z_val = (sample1_prop-sample2_prop)/combined_sd
    return z_val

In [34]:
sample1_prop = 27/100
sample2_prop = 19/100
sample1_size = 100
sample2_size = 100

In [40]:
#p-value approach
z_val = cal_z_value_2samp_prop(sample1_prop,sample2_prop,sample1_size,
                             sample2_size)
print("Z-value",z_val)
p_val = cal_p_value(z_val)
print("P-value",p_val*2)

Z-value 1.3442056254198995
P-value 0.17888190308175567
