In [167]:
import pandas as pd
import scipy 
import numpy as np
from scipy import stats 
import statsmodels.api as sm

# Q1 Cutlet Example

In [168]:
## H0 : Diameter between cutlets for two units is equal i.e. D1 = D2
## H1 : Diameter between cutlets for two units is not equal i.e. D1 != D2
## alpha = 0.05

In [169]:
cutlet_data = pd.read_csv('Cutlets.csv')
cutlet_data

Unnamed: 0,Unit A,Unit B
0,6.809,6.7703
1,6.4376,7.5093
2,6.9157,6.73
3,7.3012,6.7878
4,7.4488,7.1522
5,7.3871,6.811
6,6.8755,7.2212
7,7.0621,6.6606
8,6.684,7.2402
9,6.8236,7.0503


In [170]:
x1 = pd.Series(cutlet_data['Unit A'])
x2 = pd.Series(cutlet_data['Unit B'])
stats.ttest_ind(x1,x2)

Ttest_indResult(statistic=0.7228688704678063, pvalue=0.4722394724599501)

In [171]:
## pvalue > alpha value
## So we accept the null hypothesis i.e. diameters are equal.

# Q2 Lab Example

In [172]:
## H0 : Turn around time (TAT) of reports are same i.e. TAT1 = TAT2
## H1 : Turn around time (TAT) of reports are not same i.e. TAT1 != TAT2
## alpha = 0.05

In [173]:
lab_data = pd.read_csv('LabTAT.csv')
lab_data

Unnamed: 0,Laboratory 1,Laboratory 2,Laboratory 3,Laboratory 4
0,185.35,165.53,176.70,166.13
1,170.49,185.91,198.45,160.79
2,192.77,194.92,201.23,185.18
3,177.33,183.00,199.61,176.42
4,193.41,169.57,204.63,152.60
...,...,...,...,...
115,178.49,170.66,193.80,172.68
116,176.08,183.98,215.25,177.64
117,202.48,174.54,203.99,170.27
118,182.40,197.18,194.52,150.87


In [174]:
l1 = pd.Series(lab_data['Laboratory 1'])
l2 = pd.Series(lab_data['Laboratory 2'])
l3 = pd.Series(lab_data['Laboratory 3'])
l4 = pd.Series(lab_data['Laboratory 4'])
l1_l2=stats.ttest_ind(l1,l2)
print('l1_l2:', l1_l2)
l1_l3=stats.ttest_ind(l1,l3)
print('l1_l3:', l1_l3)
l1_l4=stats.ttest_ind(l1,l4)
print('l1_l4:',l1_l4)
l2_l3=stats.ttest_ind(l2,l3)
print('l2_l3:',l2_l3)
l2_l4=stats.ttest_ind(l2,l4)
print('l2_l4:',l2_l4)
l3_l4=stats.ttest_ind(l3,l4)
print('l3_l4:',l3_l4)

l1_l2: Ttest_indResult(statistic=-0.2975216812891612, pvalue=0.7663277896545946)
l1_l3: Ttest_indResult(statistic=-11.165478023532119, pvalue=1.4885264395661512e-23)
l1_l4: Ttest_indResult(statistic=8.02886395533212, pvalue=4.5201370470099075e-14)
l2_l3: Ttest_indResult(statistic=-10.321291605474421, pvalue=7.029124901932706e-21)
l2_l4: Ttest_indResult(statistic=7.848550374427221, pvalue=1.4261693708240927e-13)
l3_l4: Ttest_indResult(statistic=17.729769073850722, pvalue=2.1080551000374276e-45)


In [175]:
## From above analysis we can determine that except data for combination of Laboratary1 and Laboratary2 is different remaining are same.

# Q3 Buyer Ratio Example

In [176]:
## H0 : All proportions are equal.
## H1 : All proportions are not equal.
## alpha = 0.05

In [177]:
buyer_data = pd.read_csv('BuyerRatio.csv')
buyer_data

Unnamed: 0,Observed Values,East,West,North,South
0,Males,50,142,131,70
1,Females,435,1523,1356,750


In [178]:
east_ratio = pd.Series(buyer_data['East'])
west_ratio = pd.Series(buyer_data['West'])
north_ratio = pd.Series(buyer_data['North'])
south_ratio = pd.Series(buyer_data['South'])

In [179]:
east_west_proportion = stats.ttest_ind(east_ratio,west_ratio)
east_north_proportion = stats.ttest_ind(east_ratio,north_ratio)
east_south_proportion = stats.ttest_ind(east_ratio,south_ratio)
west_north_proportion = stats.ttest_ind(west_ratio,north_ratio)
west_south_proportion = stats.ttest_ind(west_ratio,south_ratio)
north_south_proportion = stats.ttest_ind(north_ratio,south_ratio)

In [180]:
print('East and West proportion:',east_west_proportion)
print('East and North proportion:',east_north_proportion)
print('East and South proportion:',east_south_proportion)
print('West and North proportion:',west_north_proportion)
print('West and South proportion:',west_south_proportion)
print('North and South proportion:',north_south_proportion)

East and West proportion: Ttest_indResult(statistic=-0.82306722896822, pvalue=0.4969913379002262)
East and North proportion: Ttest_indResult(statistic=-0.7803279219061698, pvalue=0.5168884062103105)
East and South proportion: Ttest_indResult(statistic=-0.42870398106115926, pvalue=0.7098969010499926)
West and North proportion: Ttest_indResult(statistic=0.09642371171406917, pvalue=0.9319760699170774)
West and South proportion: Ttest_indResult(statistic=0.548937325832513, pvalue=0.6381462260889935)
North and South proportion: Ttest_indResult(statistic=0.4760613442985683, pvalue=0.6809649039074765)


In [181]:
## From above statements we can see that all pvalues are greater than alpha value. Hence, we accept the null hypothesis i.e. all proportions are equal.

# Q4 TeleCall Example

In [182]:
## H0 : Defective percentage varies by centre.
## H1 : Defective percentage not varies by centre.
## alpha = 0.05

In [183]:
telecall_data = pd.read_csv('Costomer+OrderForm.csv')
telecall_data

Unnamed: 0,Phillippines,Indonesia,Malta,India
0,Error Free,Error Free,Defective,Error Free
1,Error Free,Error Free,Error Free,Defective
2,Error Free,Defective,Defective,Error Free
3,Error Free,Error Free,Error Free,Error Free
4,Error Free,Error Free,Defective,Error Free
...,...,...,...,...
295,Error Free,Error Free,Error Free,Error Free
296,Error Free,Error Free,Error Free,Error Free
297,Error Free,Error Free,Defective,Error Free
298,Error Free,Error Free,Error Free,Error Free


In [184]:
## To get categorical data in form of 1 and 0
Phillippines = telecall_data['Phillippines'].astype('category').cat.codes
Indonesia = telecall_data['Indonesia'].astype('category').cat.codes
Malta = telecall_data['Malta'].astype('category').cat.codes
India = telecall_data['India'].astype('category').cat.codes
telecall_data_clean["Phillippines"] = pd.Series(Phillippines)
telecall_data_clean["Indonesia"] = pd.Series(Indonesia)
telecall_data_clean["Malta"] = pd.Series(Malta)
telecall_data_clean["India"] = pd.Series(India)
telecall_data_clean
## 1 : Error Free
## 0 : Defective

Unnamed: 0,Phillippines,Indonesia,Malta,India
0,1,1,0,1
1,1,1,1,0
2,1,0,0,1
3,1,1,1,1
4,1,1,0,1
...,...,...,...,...
295,1,1,1,1
296,1,1,1,1
297,1,1,0,1
298,1,1,1,1


In [185]:
## Total count of Error free and defective forms in each center.
Phillippines_defective = pd.Series(telecall_data_clean['Phillippines'].value_counts())
Indonesiae_defective = pd.Series(telecall_data_clean['Indonesia'].value_counts())
Malta_defective = pd.Series(telecall_data_clean['Malta'].value_counts())
India_defective = pd.Series(telecall_data_clean['India'].value_counts())
status = ['Defective','Error Free']
actual_data = pd.DataFrame(columns= ['status','Phillippines_defective', 'Indonesia_defective', 'Malta_defective', 'India_defective'])
actual_data['status'] = pd.Series(status)
actual_data['Phillippines_defective'] = pd.Series(Phillippines_defective)
actual_data['Indonesia_defective'] = pd.Series(Indonesiae_defective)
actual_data['Malta_defective'] = pd.Series(Malta_defective)
actual_data['India_defective'] = pd.Series(India_defective)
actual_data

Unnamed: 0,status,Phillippines_defective,Indonesia_defective,Malta_defective,India_defective
0,Defective,29,33,31,20
1,Error Free,271,267,269,280


In [186]:
Chisquares_results=scipy.stats.chi2_contingency(count)
print('pvalue:',Chisquares_results[1])

pvalue: 0.7466420539639729


In [187]:
## From above pvalue is greater than alpha value so we go with null hypothesis i.e. Defective percentage varies by centre.