In [11]:
from scipy.stats import norm
from module4 import *
from module5 import *

def test_hypothesis(z, alternative, alpha,null=""):
    if alternative == 'left':
        # Left-tailed test
        print_latex(f"$P(Z\\le {{z}}) = norm.cdf({z}) = {norm.cdf(z)}$")
        p_value = norm.cdf(z)
    elif alternative == 'right':
        # Right-tailed test
        print_latex(f"$P(Z\\ge {{z}}) = 1 - P(Z\\le {{z}}) = 1-norm.cdf({z}) = {1 - norm.cdf(z)}$")
        p_value = 1 - norm.cdf(z)
    elif alternative == 'two-sided':
        # Two-tailed test
        print_latex(f"$P(Z\\le -{{z}})+ P(Z\\ge {{z}}) = 2*(1-norm.cdf(|{z}|)) = {2*(1 - norm.cdf(abs(z)))}$")
        p_value = 2*(1 - norm.cdf(abs(z)))
    else:
        return "Invalid alternative hypothesis. It should be 'less', 'greater' or 'two-sided'"
    print(f"the test statistic is {z:.4f} and the p-value is {p_value:.4f}.")
    if p_value < alpha:
        decision = "reject the null hypothesis"
        print(f"Since, {p_value} < {alpha}, we reject the null hypothesis that {null}.")
    elif p_value == alpha:
        decision = "reject the null hypothesis"
        print(f"Since, {p_value} = {alpha}, we reject the null hypothesis that {null}.")
    else:
        decision = "fail to reject the null hypothesis"
        print(f"Since, {p_value} > {alpha}, we fail to reject the null hypothesis that {null}.")

    return p_value, decision

def get_test_statistic(x, u, se):
    print("Given the formula for test statistic:")
    print_latex("$z = \\frac{\\bar{x} - \\mu}{SE}$")
    print("We can calculate the test statistic as:")
    print_latex(f"$z = \\frac{{{x:.2f} - {u:.2f}}}{{{se}}} = {(x-u)/(se)}$")
    return (x - u) / se

print("Example 1")
z_values = [1.15, 2.78, -1.81]
alternatives = ['right', 'two-sided', 'left']

for z, alternative in zip(z_values, alternatives):
    p_value, decision = test_hypothesis(z, alternative, alpha=0.05)
    print(f"For z = {z} and alternative = '{alternative}', p-value = {p_value:.4f} and we {decision}.")


print("\nExample 2")

test_hypothesis(get_test_statistic(x=6.1, u=6, se=get_standard_error_mean(n=30,sd=0.4)), 'two-sided', alpha=0.10,null="it will take an average of 6 days for us to complete the assignment")


print("\nExample 3")
z = (36.7 - 37) / (0.6 / (50 ** 0.5))

test_hypothesis(get_test_statistic(x=36.7, u=37, se=get_standard_error_mean(n=50,sd=0.6)), 'left', alpha=0.05,null="those who are 30 years old have an average body temperature that is lower than the commonly accepted average human temperature of 37 degrees Celsius")


Example 1


<IPython.core.display.Latex object>

the test statistic is 1.1500 and the p-value is 0.1251.
Since, 0.12507193563715036 > 0.05, we fail to reject the null hypothesis that .
For z = 1.15 and alternative = 'right', p-value = 0.1251 and we fail to reject the null hypothesis.


<IPython.core.display.Latex object>

the test statistic is 2.7800 and the p-value is 0.0054.
Since, 0.005435889845402553 < 0.05, we reject the null hypothesis that .
For z = 2.78 and alternative = 'two-sided', p-value = 0.0054 and we reject the null hypothesis.


<IPython.core.display.Latex object>

the test statistic is -1.8100 and the p-value is 0.0351.
Since, 0.03514789358403879 < 0.05, we reject the null hypothesis that .
For z = -1.81 and alternative = 'left', p-value = 0.0351 and we reject the null hypothesis.

Example 2


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is 1.3693 and the p-value is 0.1709.
Since, 0.1709035202307989 > 0.1, we fail to reject the null hypothesis that it will take an average of 6 days for us to complete the assignment.

Example 3


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is -3.5355 and the p-value is 0.0002.
Since, 0.00020347600872250467 < 0.05, we reject the null hypothesis that those who are 30 years old have an average body temperature that is lower than the commonly accepted average human temperature of 37 degrees Celsius.


(0.00020347600872250467, 'reject the null hypothesis')

In [12]:
x1,s1,n1=2.7,0.6,100
x2,s2,n2=2.54,0.7,100
def print_hypothesis(about="",u=0,alternative="\\neq"):
    if about=="mean":
        print_latex(f"$H_0: \\mu = {u}$")
        print_latex(f"$H_A: \\mu {alternative} {u}$")
    elif about=="two means":
        print_latex(f"$H_0: \\mu_1 - \\mu_2 = {u}$")
        print_latex(f"$H_A: \\mu_1 - \\mu_2 {alternative} {u}$")
    elif about=="proportion":
        print_latex(f"$H_0: p = {u}$")
        print_latex(f"$H_A: p {alternative} {u}$")
    elif about=="two proportions":
        print_latex(f"$H_0: p_1 - p_2 = {u}$")
        print_latex(f"$H_A: p_1 - p_2 {alternative} {u}$")
    else:
        print("Invalid hypothesis")


print_hypothesis(about="two means",u=0,alternative="\\neq")


test_hypothesis(get_test_statistic(x=x1-x2,u=0,se=get_standard_error_two_means(sd_1=s1,sd_2=s2,n_1=n1,n_2=n2)[0]), alternative='two-sided', alpha=0.05,null="there is no difference between means")



<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is 1.7354 and the p-value is 0.0827.
Since, 0.08266225205671818 > 0.05, we fail to reject the null hypothesis that there is no difference between means.


(0.08266225205671818, 'fail to reject the null hypothesis')

In [13]:
p=0.2
n=100
phat=15/100
print_hypothesis(about="proportion",alternative="<",u=p)
test_hypothesis(get_test_statistic(x=phat,u=p,se=get_standard_error_proportion(p=p,n=n)),alternative="left",alpha=0.10,null=" 20% of adults over 40yrs old participate in fitness activities at least twice a week")


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is -1.2500 and the p-value is 0.1056.
Since, 0.10564977366685518 > 0.1, we fail to reject the null hypothesis that  20% of adults over 40yrs old participate in fitness activities at least twice a week.


(0.10564977366685518, 'fail to reject the null hypothesis')

In [14]:
def get_standard_error_two_proportions(p_1=None, p_2=None, p=None, n_1=None, n_2=None,hypothesis=True):
    print("Given the formula for standard error of difference in proportions:")
    print_latex("$SE = \\sqrt{\\frac{p_1(1-p_1)}{n_1} + \\frac{p_2(1-p_2)}{n_2}}$")
    if hypothesis:
        print("It can be shown that the standard error of difference in proportions under the null hypothesis is:")
        print_latex("$SE = \\sqrt{p(1-p) \\left( \\frac{1}{n_1} + \\frac{1}{n_2} \\right)}$")
        print("where p is the pooled proportion:")
        print_latex("$p = \\frac{p_1n_1 + p_2n_2}{n_1 + n_2}$")
        if p==None:
            p = (p_1 * n_1 + p_2 * n_2) / (n_1 + n_2)
            print_latex(f"$p = \\frac{{{p_1} \\times {n_1} + {p_2} \\times {n_2}}}{{{n_1} + {n_2}}} = {p:.4f}$")
        else:
            print_latex(f"$p = {p:.4f}$")
        se = (p * (1 - p) * (1 / n_1 + 1 / n_2)) ** 0.5
        print_latex(f"$SE = \\sqrt{{{p} \\times (1 - {p}) \\left( \\frac{{1}}{{{n_1}}} + \\frac{{1}}{{{n_2}}} \\right)}} = {se:.4f}$")
        return se
    else:
        p = None
        se = (p_1 * (1 - p_1) / n_1 + p_2 * (1 - p_2) / n_2) ** 0.5
        print("Solving for SE we get:")
        print_latex(f"$SE = \\sqrt{{\\frac{{{p_1} \\times (1 - {p_1})}}{{{n_1}}} + \\frac{{{p_2} \\times (1 - {p_2})}}{{{n_2}}}}} = {se:.4f}$")
        return se

p1,n1=52/1000,1000
p2,n2=23/1000,1000
print_hypothesis(about="two proportions",u=0,alternative=">")
test_hypothesis(z=get_test_statistic(x=p1-p2,u=0,se=get_standard_error_two_proportions(p_1=p1,p_2=p2,n_1=n1,n_2=n2,hypothesis=True)),alpha=0.05,alternative="right",null="the are no difference in the proportion of men and women who were admitted to the hospital due to heart disease")


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for standard error of difference in proportions:


<IPython.core.display.Latex object>

It can be shown that the standard error of difference in proportions under the null hypothesis is:


<IPython.core.display.Latex object>

where p is the pooled proportion:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is 3.4132 and the p-value is 0.0003.
Since, 0.00032097532545860563 < 0.05, we reject the null hypothesis that the are no difference in the proportion of men and women who were admitted to the hospital due to heart disease.


(0.00032097532545860563, 'reject the null hypothesis')

In [15]:
u,x1,n1,sd=300,292,50,20
print_hypothesis(about="mean",u=u,alternative="\\neq")
test_hypothesis(z=get_test_statistic(x=x1,u=u,se=get_standard_error_mean(n=n1,sd=sd)),alpha=0.05,alternative="two-sided",null="the mean run time is 300 minutes")

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is -2.8284 and the p-value is 0.0047.
Since, 0.004677734981047177 < 0.05, we reject the null hypothesis that the mean run time is 300 minutes.


(0.004677734981047177, 'reject the null hypothesis')

In [16]:
n1,n2=30,25
x1,sd1=78,10
x2,sd2=85,15
alpha=0.1
print_hypothesis(about="two means",u=0,alternative="\\neq")
test_hypothesis(z=get_test_statistic(x=x1-x2,u=0,se=get_standard_error_two_means(sd_1=sd1,sd_2=sd2,n_1=n1,n_2=n2)[0]),alpha=alpha,alternative="two-sided",null="there is no difference in the mean scores of the two groups")

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is -1.9932 and the p-value is 0.0462.
Since, 0.0462360714437442 < 0.1, we reject the null hypothesis that there is no difference in the mean scores of the two groups.


(0.0462360714437442, 'reject the null hypothesis')

In [19]:
x1,x2=34.1,36
sd1,sd2=5.9,6
n1,n2=120,120
u_a=33
alpha=0.01

print_hypothesis(about="mean",alternative=">",u=u_a)
test_hypothesis(z=get_test_statistic(x=x1,u=u_a,se=get_standard_error_mean(n=n1,sd=sd1)),alternative="right",alpha=alpha,null="the mean lead concentration in Section A is not higher than 33ppm")



<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is 2.0424 and the p-value is 0.0206.
Since, 0.02055815020592089 > 0.01, we fail to reject the null hypothesis that the mean lead concentration in Section A is not higher than 33ppm.


(0.02055815020592089, 'fail to reject the null hypothesis')

In [23]:
alpha=0.05
print_hypothesis(about="two means",alternative=">",u=5)
test_hypothesis(z=get_test_statistic(x=abs(x1-x2),u=5,se=get_standard_error_two_means(sd_1=sd1,sd_2=sd2,n_1=n1,n_2=n2)[0]),alternative="right",alpha=alpha,null="the difference between the mean lead concentration between the two sections is more than 5ppm")


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is -4.0356 and the p-value is 1.0000.
Since, 0.9999727651971948 > 0.05, we fail to reject the null hypothesis that the difference between the mean lead concentration between the two sections is more than 5ppm.


(0.9999727651971948, 'fail to reject the null hypothesis')

In [25]:
p=0.8
n=50
phat=37/50
alpha=0.05
print_hypothesis(about="proportion",alternative="<",u=p)
test_hypothesis(z=get_test_statistic(x=phat,u=p,se=get_standard_error_proportion(p=p,n=n)),alternative="left",alpha=alpha,null="drug-dose level will induce sleep for at least 80% of people suffering from insomnia")



<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is -1.0607 and the p-value is 0.1444.
Since, 0.14442218317324224 > 0.05, we fail to reject the null hypothesis that drug-dose level will induce sleep for at least 80% of people suffering from insomnia.


(0.14442218317324224, 'fail to reject the null hypothesis')

In [26]:
n1,n2=200,200
p1,p2=16/200,8/200
alpha=0.05
print_hypothesis(about="two proportions",alternative="\\neq",u=0)
test_hypothesis(z=get_test_statistic(x=p1-p2,u=0,se=get_standard_error_two_proportions(p_1=p1,p_2=p2,n_1=n1,n_2=n2,hypothesis=True)),alternative="two-sided",alpha=alpha,null="there is no difference in the performance of the machine types")

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for standard error of difference in proportions:


<IPython.core.display.Latex object>

It can be shown that the standard error of difference in proportions under the null hypothesis is:


<IPython.core.display.Latex object>

where p is the pooled proportion:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

Given the formula for test statistic:


<IPython.core.display.Latex object>

We can calculate the test statistic as:


<IPython.core.display.Latex object>

<IPython.core.display.Latex object>

the test statistic is 1.6843 and the p-value is 0.0921.
Since, 0.0921229632576519 > 0.05, we fail to reject the null hypothesis that there is no difference in the performance of the machine types.


(0.0921229632576519, 'fail to reject the null hypothesis')