# One sample T-test

# import numpy as np
ages=[10,20,35,50,28,40,55,18,16,55,30,25,43,18,30,28,14,24,16,17,32,35,26,27,65,18,43,23,21,20,19,70]
ages_mean=np.mean(ages)

In [2]:
sample_size=10
age_sample=np.random.choice(ages,sample_size)

In [5]:
from scipy.stats import ttest_1samp
ttest,p_value=ttest_1samp(age_sample,30)

if p_value<0.05:
    print("Null hypothesis rejected")
else:
    print("Null hypothesis accepted")

# Example

In [12]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import math
np.random.seed(6)
school_ages=stats.poisson.rvs(loc=18,mu=35,size=1500)
classA_ages=stats.poisson.rvs(loc=18,mu=30,size=60)

In [14]:
_,p_value=stats.ttest_1samp(a=classA_ages,popmean=school_ages.mean())

In [16]:
if p_value<0.05:
    print("Null hypothesis rejected")
else:
    print("Null hypothesis accepted")

Null hypothesis rejected


# Two sampled T-Test

In [18]:
np.random.seed(12)
classB_ages=stats.poisson.rvs(loc=18,mu=33,size=60)

In [20]:
_,p_value=stats.ttest_ind(a=classA_ages,b=classB_ages,equal_var=False)

In [21]:
if p_value<0.05:
    print("Null hypothesis rejected")
else:
    print("Null hypothesis accepted")

Null hypothesis rejected


# Paired T-test

In [28]:
weight1=[25,30,23,34,24,23,45,23,43,35,35,24,34,24,31]
weight2=weight1+stats.norm.rvs(scale=5,loc=-1.25,size=15)

In [29]:
weight_df=pd.DataFrame({"w10":np.array(weight1),
                      "w20":np.array(weight2),
                      "weigt_change":np.array(weight2)-np.array(weight1)})

In [30]:
weight_df

Unnamed: 0,w10,w20,weigt_change
0,25,30.579265,5.579265
1,30,34.910224,4.910224
2,23,24.004446,1.004446
3,34,29.542951,-4.457049
4,24,15.86202,-8.13798
5,23,26.578732,3.578732
6,45,37.329983,-7.670017
7,23,15.377139,-7.622861
8,43,49.364209,6.364209
9,35,41.059412,6.059412


In [31]:
_,p_value=stats.ttest_rel(a=weight1,b=weight2)

In [32]:
if p_value<0.05:
    print("Null hypothesis rejected")
else:
    print("Null hypothesis accepted")

Null hypothesis accepted


# CHI Square test

In [54]:
import scipy.stats as stats
import seaborn as sns
import pandas as pd
import numpy as np
dataset=sns.load_dataset('tips')

In [34]:
dataset.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [35]:
dataset_table=pd.crosstab(dataset['sex'],dataset['smoker'])
dataset_table

smoker,Yes,No
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,60,97
Female,33,54


In [36]:
Observed_vals=dataset_table.values
Observed_vals

array([[60, 97],
       [33, 54]], dtype=int64)

In [37]:
val=stats.chi2_contingency(dataset_table)

In [38]:
val

(0.008763290531773594, 0.925417020494423, 1, array([[59.84016393, 97.15983607],
        [33.15983607, 53.84016393]]))

In [39]:
expected_val=val[3]

In [48]:
rows=len(dataset_table.iloc[0:2,0])
cols=len(dataset_table.iloc[0,0:2])
ddof=(rows-1)*(cols-1)
alpha=0.05

In [50]:
from scipy.stats import chi2
chi_square=sum([(o-e)**2./e for o,e in zip(Observed_vals, expected_val)])
chi_square_stat=chi_square[0]+chi_square[1]

In [51]:
chi_square_stat

0.001934818536627623

In [52]:
#ppf is percent point fxn (inverse of cdf-cumulative density function)
critical_val=chi2.ppf(q=1-alpha,df=ddof)
critical_val

3.841458820694124

In [53]:
p_val=1-chi2.cdf(x=chi_square_stat,df=ddof)
print("p_value",p_val)
print('significance level', alpha)
print("Degree of Freedom", ddof)


p_value 0.964915107315732
significance level 0.05
Degree of Freedom 1
