In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import scipy

# ***One Tail Test***

Example : - An outbreak of Salmonella-related illness was attributed to ice cream produced at a certain factory. Scientists measured the level of Salmonella in 9 randomly sampled batches of ice cream. The levels (in MPN/g)were 0.593,0.142,0.329,0.691,0.231,0.793,0.519,0.392,0.418.

Is there evidence that the mean level of Salmonella in the ice cream is greater than 0.3 MPN/g

In [2]:
data = pd.Series([0.593,0.142,0.329,0.691,0.231,0.793,0.519,0.392,0.418])
data

0    0.593
1    0.142
2    0.329
3    0.691
4    0.231
5    0.793
6    0.519
7    0.392
8    0.418
dtype: float64

In [3]:
scipy.stats.ttest_1samp(data,0.3)
# This will give us 2 tail P value

TtestResult(statistic=2.2050588385131595, pvalue=0.05853032968489765, df=8)

In [4]:
#We want 1 tail P value so we need to devide P value by 2

s,p = scipy.stats.ttest_1samp(data,0.3)
p_value = p/2
p_value

0.029265164842448826

Confidence level = 95 % so α = 0.05 & p = 0.029
 null hypothesis µ < 0.3
 alternate hypothesis µ> 0.3

**α > P so we reject null hypothesis.**

**We conclude that icecream factory is causing problem and because of this ice cream there is a  outbreak of Salmonella-related illness.**


# ***Two Tail Test***

Example : Six subjects were given a drug (treatment group ) and additional six subjects a placebo (control group).Their reaction time to a stimulus was measured (in ms). We want to perform a two_sample t-test for comparing the means of the treatment and control groups.

(1) Control:- 91,87,99,77,88,91

(2) Treat :- 101,110,103,93,99,104


In [5]:
control = pd.Series([91,87,99,77,88,91])

treat = pd.Series([101,110,103,93,99,104])

In [6]:
stats.ttest_ind(control,treat)

Ttest_indResult(statistic=-3.4456126735364876, pvalue=0.006272124350809803)

α = 0.005 & p = 0.0062

p<α so reject null hypothesis.

we conclude that there is difference between mean of sample1 and sample 2 and drug given to treatment group having actual effect of drug.


# ***2 Proportion Test***

 ***Usecase :-*** Is there a significant difference between the population propotions of state 1 and state 2 who report that they have been placed immediately after education ?

 **Populations :**All students who have completed graduation and post graduation in both states

 ***Parameter of Interest :   ***  p1 - p2, where p1 = state 1  & p2 = state 2

 ***Data :   ***  247 students from state 1. 36.8 % of students report that they have got the job.

 308 students from state 2. 38.9% of students report that they have got the job.

Hypothesis Defination:   

Null Hypothesis :  p1-p2 =0

Alternative Hypothesis :   p1-p2 ≠ 0


In [7]:
import numpy as np

In [8]:
#Data:

n1 = 247
p1 = .37

n2 = 308
p2 = .39

In [9]:
population1 = np.random.binomial(1, p1, n1)
population2 = np.random.binomial(1, p2, n2)

In [10]:
population1

array([0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 1, 1, 1, 0])

In [11]:
len(population1)

247

In [12]:
population2

array([1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1,

In [13]:
population1.mean()

0.38461538461538464

In [14]:
population2.mean()

0.3961038961038961

In [15]:
import statsmodels.api as sm
sm.stats.ttest_ind(population1, population2)

(-0.27516403893146046, 0.783292991260126, 553.0)

p = 0.783   and α = 0.05

p > α so fail to reject null hypothesis

