In [2]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (15,10)

# Q1

A F&B manager wants to determine whether there is any significant difference in the diameter of the cutlet between two units. A randomly selected sample of cutlets was collected from both units and measured? Analyze the data and draw inferences at 5% significance level. Please state the assumptions and tests that you carried out to check validity of the assumptions.


In [3]:
df = pd.read_csv('./Cutlets.csv')
df

Unnamed: 0,Unit A,Unit B
0,6.809,6.7703
1,6.4376,7.5093
2,6.9157,6.73
3,7.3012,6.7878
4,7.4488,7.1522
5,7.3871,6.811
6,6.8755,7.2212
7,7.0621,6.6606
8,6.684,7.2402
9,6.8236,7.0503


**Let null hypothesis be Ho: μ1 = μ2 (i.e there is no significant difference) <br> and Alternative hypothesis Ha: μ1 ≠ μ2(i.e there is significant difference)**

* **Given Significance level (α) = 0.05**
* **α/2 = 0.025**

**Using two tail test**

In [4]:
alpha = 0.05

In [5]:
statscore , pval = stats.ttest_ind(df['Unit A'].to_numpy(),df['Unit B'].to_numpy())
pval

0.47223947245995

In [6]:
if pval > alpha:
    print(f"\033[1;32m Since pval {pval} > {alpha} Filed to reject null hypothesis , there is no significant difference")
else:
    print(f"\033[1;91m Since pval {pval} < {alpha} reject null hypothesis , there is significant difference")

[1;32m Since pval 0.47223947245995 > 0.05 Filed to reject null hypothesis , there is no significant difference


# Q2

A hospital wants to determine whether there is any difference in the average Turn Around Time (TAT) of reports of the laboratories on their preferred list. They collected a random sample and recorded TAT for reports of 4 laboratories. TAT is defined as sample collected to report dispatch.
   
  Analyze the data and determine whether there is any difference in average TAT among the different laboratories at 5% significance level.


In [7]:
df2 = pd.read_csv('./LabTAT.csv')
df2

Unnamed: 0,Laboratory 1,Laboratory 2,Laboratory 3,Laboratory 4
0,185.35,165.53,176.70,166.13
1,170.49,185.91,198.45,160.79
2,192.77,194.92,201.23,185.18
3,177.33,183.00,199.61,176.42
4,193.41,169.57,204.63,152.60
...,...,...,...,...
115,178.49,170.66,193.80,172.68
116,176.08,183.98,215.25,177.64
117,202.48,174.54,203.99,170.27
118,182.40,197.18,194.52,150.87


**Let null hypothesis be Ho: μ1 = μ2 = μ3 = μ4  (i.e there is no significant difference amongst columns) <br> and Alternative hypothesis Ha: μ1 ≠ μ2 ≠ μ3 ≠ μ4(i.e there is atleast one column with significant difference)**

* **Given Significance level (α) = 0.05**
* **α/2 = 0.025**

**Since number of columns greater than 2 we are going to use anova**

In [8]:
alpha2 = 0.05

In [9]:
stscr ,pvalf = stats.f_oneway(df2['Laboratory 1'],df2['Laboratory 2'],df2['Laboratory 3'],df2['Laboratory 4'])
pvalf

2.1156708949992414e-57

In [10]:
if pvalf > alpha2:
    print(f"\033[1;32m Since pval {pvalf} > {alpha2} Filed to reject null hypothesis , there is no significant difference amongst columns")
else:
    print(f"\033[1;91m Since pval {pvalf} < {alpha2} reject null hypothesis , there is atleast one column with significant difference")

[1;91m Since pval 2.1156708949992414e-57 < 0.05 reject null hypothesis , there is atleast one column with significant difference


# Q3

In [11]:
obs = pd.read_csv('./BuyerRatio.csv')
obs

Unnamed: 0,Observed Values,East,West,North,South
0,Males,50,142,131,70
1,Females,435,1523,1356,750


**Let null hypothesis be Ho: p1 = p2  (i.e all proportions are equal) <br> and Alternative hypothesis Ha: p1 ≠ p2(i.e proportions are not equal)**

In [12]:
alpha3 = 0.05

In [13]:
obs_table = obs.iloc[:,1:]
obs_table

Unnamed: 0,East,West,North,South
0,50,142,131,70
1,435,1523,1356,750


In [14]:
chi , pvalchi , dof , exp = stats.chi2_contingency(obs_table)
pvalchi

0.6603094907091882

In [15]:
if pvalchi > alpha3:
    print(f"\033[1;32m Since pval {pvalchi} > {alpha3} Filed to reject null hypothesis , all proportions are equal")
else:
    print(f"\033[1;91m Since pval {pvalchi} < {alpha3} reject null hypothesis , proportions are not equal")

[1;32m Since pval 0.6603094907091882 > 0.05 Filed to reject null hypothesis , all proportions are equal


# Q4

TeleCall uses 4 centers around the globe to process customer order forms. They audit a certain %  of the customer order forms. Any error in order form renders it defective and has to be reworked before processing.  The manager wants to check whether the defective %  varies by centre. Please analyze the data at 5% significance level and help the manager draw appropriate inferences


In [16]:
df3 = pd.read_csv('./Costomer+OrderForm.csv')
df3

Unnamed: 0,Phillippines,Indonesia,Malta,India
0,Error Free,Error Free,Defective,Error Free
1,Error Free,Error Free,Error Free,Defective
2,Error Free,Defective,Defective,Error Free
3,Error Free,Error Free,Error Free,Error Free
4,Error Free,Error Free,Defective,Error Free
...,...,...,...,...
295,Error Free,Error Free,Error Free,Error Free
296,Error Free,Error Free,Error Free,Error Free
297,Error Free,Error Free,Defective,Error Free
298,Error Free,Error Free,Error Free,Error Free


**Let null hypothesis be Ho: p1 = p2  (i.e defective % does not varies by centre ) <br> and Alternative hypothesis Ha: p1 ≠ p2(i.e defective % varies by centre)**

In [17]:
alpha4 = 0.05

In [18]:
obs_frq = pd.DataFrame(index=['Defective','Error Free'],columns=df3.columns)

In [19]:
for idx,i in enumerate(df3.columns):
    er , defe = df3[i].value_counts()[0] ,df3[i].value_counts()[1]
    obs_frq.iloc[0,idx] = defe
    obs_frq.iloc[1,idx] = er
obs_frq

Unnamed: 0,Phillippines,Indonesia,Malta,India
Defective,29,33,31,20
Error Free,271,267,269,280


In [20]:
chi , pvalch2 , dof , exp = stats.chi2_contingency(obs_frq)
pvalch2

0.2771020991233135

In [21]:
if pvalch2 > alpha4:
    print(f"\033[1;32m Since pval {pvalch2} > {alpha4} Filed to reject null hypothesis , defective % does not varies by centre")
else:
    print(f"\033[1;91m Since pval {pvalch2} < {alpha4} reject null hypothesis , varies by centre")

[1;32m Since pval 0.2771020991233135 > 0.05 Filed to reject null hypothesis , defective % does not varies by centre
