In [1]:
import pandas as pd
import scipy.stats as stats
import numpy as np

##### Procedure for Hypothesis Testing.
* Define null and alternative hypothesis.
* Identify the test statistic to be used for testing the validity of null hypothesis,Z-test or t-test.
* Decide the significance value(Alpha),Typical value used alpha=0.05.
* calculate the p-value , which is the conditional probability of observing the test statistic when null hypothesis is   
  true.
* Take the decision based on p-value.

## Cutlet Market Data

In [2]:
Cutlet_data = pd.read_csv('Cutlets.csv')
Cutlet_data

Unnamed: 0,Unit A,Unit B
0,6.809,6.7703
1,6.4376,7.5093
2,6.9157,6.73
3,7.3012,6.7878
4,7.4488,7.1522
5,7.3871,6.811
6,6.8755,7.2212
7,7.0621,6.6606
8,6.684,7.2402
9,6.8236,7.0503


In [3]:
Cutlet_data.shape

(35, 2)

In [4]:
Cutlet_data.describe()

Unnamed: 0,Unit A,Unit B
count,35.0,35.0
mean,7.019091,6.964297
std,0.288408,0.343401
min,6.4376,6.038
25%,6.8315,6.7536
50%,6.9438,6.9399
75%,7.28055,7.195
max,7.5169,7.5459


In [5]:
unit_a = Cutlet_data[['Unit A']]
unit_a

Unnamed: 0,Unit A
0,6.809
1,6.4376
2,6.9157
3,7.3012
4,7.4488
5,7.3871
6,6.8755
7,7.0621
8,6.684
9,6.8236


In [6]:
np.mean(unit_a)

Unit A    7.019091
dtype: float64

In [7]:
unit_b = Cutlet_data[['Unit B']]
unit_b

Unnamed: 0,Unit B
0,6.7703
1,7.5093
2,6.73
3,6.7878
4,7.1522
5,6.811
6,7.2212
7,6.6606
8,7.2402
9,7.0503


In [8]:
np.mean(unit_b)

Unit B    6.964297
dtype: float64

In [9]:
from scipy.stats import ttest_ind
avg_data,pval = ttest_ind(unit_a,unit_b)
print("pval: {}.".format(pval))

pval: [0.47223947].


In [10]:
#H0: There is no significant difference in the diameter of the cutlet between two units.
#Ha: There is a significant difference in the diameter of the cutlet between two units

In [11]:
if pval<0.05:
    print('There is a significant difference in the diameter of the cutlet between two units so reject null hyphothisis')
else:
    print('There is no significant difference in the diameter of the cutlet between two units so do not reject null hyphothisis')
    print('pval: {}.'.format(pval))

There is no significant difference in the diameter of the cutlet between two units so do not reject null hyphothisis
pval: [0.47223947].


## Hospital Laboratory Data Example

In [12]:
lab_data= pd.read_csv('LabTat.csv')
lab_data

Unnamed: 0,Laboratory 1,Laboratory 2,Laboratory 3,Laboratory 4
0,185.35,165.53,176.70,166.13
1,170.49,185.91,198.45,160.79
2,192.77,194.92,201.23,185.18
3,177.33,183.00,199.61,176.42
4,193.41,169.57,204.63,152.60
...,...,...,...,...
115,178.49,170.66,193.80,172.68
116,176.08,183.98,215.25,177.64
117,202.48,174.54,203.99,170.27
118,182.40,197.18,194.52,150.87


In [13]:
lab_data.shape

(120, 4)

In [14]:
lab_data.isnull().sum()

Laboratory 1    0
Laboratory 2    0
Laboratory 3    0
Laboratory 4    0
dtype: int64

In [15]:
lab_data.describe()

Unnamed: 0,Laboratory 1,Laboratory 2,Laboratory 3,Laboratory 4
count,120.0,120.0,120.0,120.0
mean,178.361583,178.902917,199.91325,163.68275
std,13.173594,14.957114,16.539033,15.08508
min,138.3,140.55,159.69,124.06
25%,170.335,168.025,188.2325,154.05
50%,178.53,178.87,199.805,164.425
75%,186.535,189.1125,211.3325,172.8825
max,216.39,217.86,238.7,205.18


In [16]:
lab1 = lab_data[['Laboratory 1']]
lab1

Unnamed: 0,Laboratory 1
0,185.35
1,170.49
2,192.77
3,177.33
4,193.41
...,...
115,178.49
116,176.08
117,202.48
118,182.40


In [17]:
lab2 = lab_data[['Laboratory 2']]
lab2

Unnamed: 0,Laboratory 2
0,165.53
1,185.91
2,194.92
3,183.00
4,169.57
...,...
115,170.66
116,183.98
117,174.54
118,197.18


In [18]:
lab3 = lab_data[['Laboratory 3']]
lab3

Unnamed: 0,Laboratory 3
0,176.70
1,198.45
2,201.23
3,199.61
4,204.63
...,...
115,193.80
116,215.25
117,203.99
118,194.52


In [19]:
lab4 = lab_data[['Laboratory 4']]
lab4

Unnamed: 0,Laboratory 4
0,166.13
1,160.79
2,185.18
3,176.42
4,152.60
...,...
115,172.68
116,177.64
117,170.27
118,150.87


In [21]:
from scipy.stats import f_oneway
lab_num,pval=f_oneway(lab1,lab2,lab3,lab4)
print("pval:{}".format(pval))

pval:[2.11567089e-57]


In [22]:
#H0=Difference in average TAT among the laboratories are  same 
#Ha:Difference in average of TAT among the liboratories are not same

In [23]:
if pval<0.05:
    print("Difference in average of TAT among the liboratories are same.Then we reject null hyphothesis")
else:
    print("Difference in average of TAT among the liboratories are not same.Then we do not reject null hyphothesis")
print("pval:{}".format(pval))

Difference in average of TAT among the liboratories are  same.Then we reject null hyphothesis
pval:[2.11567089e-57]


## Buyer Ratio Example

In [24]:
buyer_ratio = pd.read_csv('BuyerRatio.csv')
buyer_ratio

Unnamed: 0,Observed Values,East,West,North,South
0,Males,50,142,131,70
1,Females,435,1523,1356,750


In [26]:
buyer_ratio.shape

(2, 5)

In [27]:
buyer_ratio.describe()

Unnamed: 0,East,West,North,South
count,2.0,2.0,2.0,2.0
mean,242.5,832.5,743.5,410.0
std,272.236111,976.514465,866.205807,480.832611
min,50.0,142.0,131.0,70.0
25%,146.25,487.25,437.25,240.0
50%,242.5,832.5,743.5,410.0
75%,338.75,1177.75,1049.75,580.0
max,435.0,1523.0,1356.0,750.0


In [28]:
east_west = pd.crosstab(buyer_ratio.East,buyer_ratio.West)
east_west

West,142,1523
East,Unnamed: 1_level_1,Unnamed: 2_level_1
50,1,0
435,0,1


In [29]:
chi_value,pval,df,expected_value = stats.chi2_contingency(east_west)
print('chi_val: {}.\npval: {}.\ndegrees of freedom: {}.\nExpected value: \n{}'.format(chi_value,pval,df,expected_value))

chi_val: 0.0.
pval: 1.0.
degrees of freedom: 1.
Expected value: 
[[0.5 0.5]
 [0.5 0.5]]


In [30]:
# H0 : All proportions are equal
# Ha: Not all proportions are equal
#Pval<0.05, we reject null hyphothisis

In [44]:
if pval<0.05:
    print("All proportions are equal")
else:
    print("Not all proportions are equal ")
pval

Not all proportions are equal 


1.0

In [32]:
north_south = pd.crosstab(buyer_ratio.North,buyer_ratio.South)
north_south

South,70,750
North,Unnamed: 1_level_1,Unnamed: 2_level_1
131,1,0
1356,0,1


In [33]:
chi_value,pval,df,expected_value = stats.chi2_contingency(north_south)
print('chi_val: {}.\npval: {}.\ndegrees of freedom: {}.\nExpected value: \n{}'.format(chi_value,pval,df,expected_value))

chi_val: 0.0.
pval: 1.0.
degrees of freedom: 1.
Expected value: 
[[0.5 0.5]
 [0.5 0.5]]


In [34]:
# H0 : All proportions are equal
# Ha: Not all proportions are equal
#Pval<0.05, we reject null hyphothisis

In [45]:
if pval<0.05:
    print("All proportions are equal")
else:
    print("Not all proportions are equal")
pval

Not all proportions are equal


1.0

In [36]:
east_north = pd.crosstab(buyer_ratio.East,buyer_ratio.North)
east_north

North,131,1356
East,Unnamed: 1_level_1,Unnamed: 2_level_1
50,1,0
435,0,1


In [37]:
chi_value,pval,df,expected_value = stats.chi2_contingency(east_north)
print('chi_val: {}.\npval: {}.\ndegrees of freedom: {}.\nExpected value: \n{}'.format(chi_value,pval,df,expected_value))

chi_val: 0.0.
pval: 1.0.
degrees of freedom: 1.
Expected value: 
[[0.5 0.5]
 [0.5 0.5]]


In [46]:
if pval<0.05:
    print("All proportions are equal")
else:
    print("Not all proportions are equal")
pval

Not all proportions are equal


1.0

In [39]:
east_south = pd.crosstab(buyer_ratio.East,buyer_ratio.South)
east_south

South,70,750
East,Unnamed: 1_level_1,Unnamed: 2_level_1
50,1,0
435,0,1


In [40]:
chi_value,pval,df,expected_value = stats.chi2_contingency(east_north)
print('chi_val: {}.\npval: {}.\ndegrees of freedom: {}.\nExpected value: \n{}'.format(chi_value,pval,df,expected_value))

chi_val: 0.0.
pval: 1.0.
degrees of freedom: 1.
Expected value: 
[[0.5 0.5]
 [0.5 0.5]]


In [42]:
if pval<0.05:
    print("All proportions are equal")
else:
    print("Not all proportions are equal reject")
pval

Not all proportions are equal reject


1.0

#### From all proportionality test of data we found that p>0.05 so we can reject null hypothesis and go for alternative hyphothesis

## Costomer Order Data Example

In [47]:
import seaborn as sns

In [48]:
costomer_order_data = pd.read_csv('Costomer+OrderForm.csv') # importing data from excel
costomer_order_data

Unnamed: 0,Phillippines,Indonesia,Malta,India
0,Error Free,Error Free,Defective,Error Free
1,Error Free,Error Free,Error Free,Defective
2,Error Free,Defective,Defective,Error Free
3,Error Free,Error Free,Error Free,Error Free
4,Error Free,Error Free,Defective,Error Free
...,...,...,...,...
295,Error Free,Error Free,Error Free,Error Free
296,Error Free,Error Free,Error Free,Error Free
297,Error Free,Error Free,Defective,Error Free
298,Error Free,Error Free,Error Free,Error Free


In [50]:
costomer_order_data.shape

(300, 4)

In [54]:
costomer_order_data[costomer_order_data.Phillippines == "Defective"].shape[0]#29 records from Phillippines are defective,0.09 % of total record 

29

In [55]:
costomer_order_data[costomer_order_data.Indonesia == "Defective"].shape[0]#33 records from Indonesia are defective,0.11 % of total record

33

In [56]:
# H0 = There is an defective percentage varies by centeres
# Ha = There is no defective percentage varies by centeres

In [57]:
#Data:

n1 = 29
p1 = .09

n2 = 33
p2 = .11

In [58]:
population1 = np.random.binomial(1, p1, n1)
population2 = np.random.binomial(1, p2, n2)

In [59]:
import statsmodels.api as sm
sm.stats.ttest_ind(population1, population2)

(1.387163739648832, 0.17052332951444227, 60.0)

#### Here the p value is greater than 0.05 then we do not reject null hyphothesis