In [1]:
import numpy as np
import pandas as pd
from scipy import stats

### Example : 1

A principal at a certain school claims that the students in his school are above average intelligence. 
A random sample of thirty students IQ scores have a mean score of 112.5. Is there sufficient evidence to support the principal’s claim? The mean population IQ is 100 with a standard deviation of 15.

H1 : The student in the school are having above average ($\mu > 100$)
    
H0 : The student in the school are having below average ($\mu \le  100$)    

In [5]:
n=30
sample_mean=112.5
pop_mean=100
pop_std=15

# $t = \frac{\bar{X} - \mu}{\frac{\sigma}{\sqrt{n-1}}}$

In [6]:
def t_score(n,pop_mean,pop_std,sample_mean):
    numerator=sample_mean - pop_mean
    denominator=pop_std / np.sqrt(n-1)
    return numerator/denominator

In [8]:
cal=t_score(n,pop_mean,pop_std,sample_mean)
cal

4.487637339278753

In [None]:
p_val = 1 - cdf(cal)   # one-tailed test

In [9]:
p_val = 1 - stats.t.cdf(cal,n-1)
p_val

5.253158558449211e-05

In [11]:
p_val =0.000053

In [12]:
if p_val <= 0.05:
    print('Reject H0 : The student in the school are having above average ( 𝜇>100 )')
else:
    print('Failed to Reject H0 : The student in the school are having below average ( 𝜇≤100 )')

Reject H0 : The student in the school are having above average ( 𝜇>100 )


### Example : 2

Blood glucose levels for obese patients have a mean of 100 with a standard deviation of 15. 
A researcher thinks that a diet high in raw cornstarch will have a positive or negative effect 
on blood glucose levels. A sample of 30 patients who have tried the raw cornstarch diet have a 
mean glucose level of 140. Test the hypothesis that the raw cornstarch had an effect.

H0 : The raw cornstarch had no effect

H1 : The raw cornstarch had an effect

In [13]:
n=30
pop_mean=100
sample_mean=140
sd=15

In [22]:
def t_score(n,pop_mean,sd,sample_mean):
    numerator=sample_mean - pop_mean
    denominator=sd / np.sqrt(n-1)
    return numerator/denominator

In [23]:
cal=t_score(n,pop_mean,pop_std,sample_mean)
cal

14.36043948569201

In [None]:
p_val = 2*(1 - cdf(cal))   # two-tailed test

In [31]:
p_val = 2* (1 - stats.norm.cdf(cal))
p_val

0.0

In [29]:
if p_val <= 0.05:
    print('Reject H0 : The raw cornstarch had an effect')
else:
    print('Failed to Reject H0 : The raw cornstarch had no effect')

Reject H0 : The raw cornstarch had an effect


### Chi-Square Test

In [32]:
import seaborn as sns

In [35]:
tips=sns.load_dataset('tips')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [46]:
ct=pd.crosstab(tips['sex'],tips['smoker'])
ct

smoker,Yes,No
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,60,97
Female,33,54


In [47]:
stats.chi2_contingency(ct)

(0.0,
 1.0,
 1,
 array([[59.84016393, 97.15983607],
        [33.15983607, 53.84016393]]))

In [48]:
stat, p, dof, expected = stats.chi2_contingency(ct)
  
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (H0 holds true)')

p value is 1.0
Independent (H0 holds true)


In [55]:
def chi_test(data,col1,col2):
    ct=pd.crosstab(data[col1],data[col2])
    ct
    stats.chi2_contingency(ct)
    stat, p, dof, expected = stats.chi2_contingency(ct)

    # interpret p-value
    alpha = 0.05
    print("p value is " + str(p))
    if p <= alpha:
        return('tip and {} are Dependent (reject H0)'.format(col2))
    else:
        return('tip and {} are Independent (H0 holds true)'.format(col2))

In [56]:
for i in tips.columns:
    print(chi_test(tips,'tip',i))

p value is 0.07795476511618318
tip and total_bill are Independent (H0 holds true)
p value is 0.0
tip and tip are Dependent (reject H0)
p value is 0.5825731193437201
tip and sex are Independent (H0 holds true)
p value is 0.7279546665100842
tip and smoker are Independent (H0 holds true)
p value is 0.9147886011626277
tip and day are Independent (H0 holds true)
p value is 0.8026198364134436
tip and time are Independent (H0 holds true)
p value is 0.03864648495551946
tip and size are Dependent (reject H0)
