# Importing Necessary Libraries

In [2]:
import pandas as pd
from scipy import stats
import numpy as np
from statsmodels.stats.proportion import proportions_ztest

# Problem - 1
    A F&B manager wants to determine whether there is any significant difference in the diameter of the cutlet between two units. A randomly selected sample of cutlets was collected from both units and measured? Analyze the data and draw inferences at 5% significance level. Please state the assumptions and tests that you carried out to check validity of the assumptions.

### Step 1 - Data Importing, Undestanding and Cleaning

In [3]:
cutlets_data = pd.read_csv('Cutlets.csv')
cutlets_data.head()

Unnamed: 0,Unit A,Unit B
0,6.809,6.7703
1,6.4376,7.5093
2,6.9157,6.73
3,7.3012,6.7878
4,7.4488,7.1522


In [4]:
cutlets_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Unit A  35 non-null     float64
 1   Unit B  35 non-null     float64
dtypes: float64(2)
memory usage: 688.0 bytes


### Step 2 - Hypothesis Formulation   
    H0 : µ-A = µ-B (There is no significant difference in the diameter of the cutlet between two units.)
    Ha : µ-A ≠ µ-B (There is significant difference in the diameter of the cutlet between two units.)

### Step 3 - Gathering Evidence

In [5]:
test_stat1, p_value1 = stats.ttest_ind(cutlets_data['Unit A'], cutlets_data['Unit B'])

In [6]:
p_value1

0.4722394724599501

### Step 4 - Checking the Sufficiency of Evidence with Significance Value

In [7]:
if p_value1 < 0.05:
    print('Reject Null Hypothesis(H0)')
else:
    print('Do Not Reject Null Hypothesis(H0)')

Do Not Reject Null Hypothesis(H0)


# ----------------------------------------------------------------------------------------------------------

# Problem - 2
    A hospital wants to determine whether there is any difference in the average Turn Around Time (TAT) of reports of the laboratories on their preferred list. They collected a random sample and recorded TAT for reports of 4 laboratories. TAT is defined as sample collected to report dispatch.
    Analyze the data and determine whether there is any difference in average TAT among the different laboratories at 5% significance level.

### Step 1 - Data Importing, Undestanding and Cleaning

In [8]:
lab_tat_data = pd.read_csv('LabTAT.csv')
lab_tat_data.head()

Unnamed: 0,Laboratory 1,Laboratory 2,Laboratory 3,Laboratory 4
0,185.35,165.53,176.7,166.13
1,170.49,185.91,198.45,160.79
2,192.77,194.92,201.23,185.18
3,177.33,183.0,199.61,176.42
4,193.41,169.57,204.63,152.6


In [9]:
lab_tat_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Laboratory 1  120 non-null    float64
 1   Laboratory 2  120 non-null    float64
 2   Laboratory 3  120 non-null    float64
 3   Laboratory 4  120 non-null    float64
dtypes: float64(4)
memory usage: 3.9 KB


### Step 2 - Hypothesis Formulation     
    H0 : µ-1 = µ-2 = µ-3 = µ-4  (There is no difference in average TAT among the different laboratories.) 
    Ha : µ-1 ≠ µ-2 ≠ µ-3 ≠ µ-4  (There is difference in average TAT among the different laboratories.)

### Step 3 - Gathering Evidence

In [10]:
test_stat2, p_value2 = stats.f_oneway(lab_tat_data['Laboratory 1'], lab_tat_data['Laboratory 2'], 
                                      lab_tat_data['Laboratory 3'], lab_tat_data['Laboratory 4'])

In [11]:
p_value2

2.1156708949992414e-57

### Step 4 - Checking the Sufficiency of Evidence with Significance Value

In [12]:
if p_value2 < 0.05:
    print('Reject Null Hypothesis(H0)')
else:
    print('Do Not Reject Null Hypothesis(H0)')

Reject Null Hypothesis(H0)


# ----------------------------------------------------------------------------------------------------------

# Problem - 3
    Sales of products in four different regions is tabulated for males and females. Find if male-female buyer rations are similar across regions. 

### Step 1 - Data Importing, Undestanding and Cleaning

In [13]:
buyer_ratio_data = pd.read_csv('BuyerRatio.csv')
buyer_ratio_data

Unnamed: 0,Observed Values,East,West,North,South
0,Males,50,142,131,70
1,Females,435,1523,1356,750


In [14]:
buyer_ratio_data.drop('Observed Values', axis=1, inplace=True)

### Step 2 - Hypothesis Formulation    
    H0 : p1 = p2 = p3 = p4  (All Proportions are Equal.) 
    Ha : p1 ≠ p2 ≠ p3 ≠ p4  (Not All Proportions are Equal.)

In [15]:
test_stat3, p_value3, df3, expected3 = stats.chi2_contingency(buyer_ratio_data)

### Step 3 - Gathering Evidence

In [16]:
p_value3

0.6603094907091882

### Step 4 - Checking the Sufficiency of Evidence with Significance Value

In [17]:
if p_value3< 0.05:
    print('Reject Null Hypothesis(H0)')
else:
    print('Do Not Reject Null Hypothesis(H0)')

Do Not Reject Null Hypothesis(H0)


# -----------------------------------------------------------------------------------------------------------

# Problem - 4
    TeleCall uses 4 centers around the globe to process customer order forms. They audit a certain %  of the customer order forms. Any error in order form renders it defective and has to be reworked before processing.  The manager wants to check whether the defective %  varies by centre. Please analyze the data at 5% significance level and help the manager draw appropriate inferences

### Step 1 - Data Importing, Undestanding and Cleaning

In [18]:
orderform_data = pd.read_csv('Costomer+OrderForm.csv')

In [19]:
orderform_data.head()

Unnamed: 0,Phillippines,Indonesia,Malta,India
0,Error Free,Error Free,Defective,Error Free
1,Error Free,Error Free,Error Free,Defective
2,Error Free,Defective,Defective,Error Free
3,Error Free,Error Free,Error Free,Error Free
4,Error Free,Error Free,Defective,Error Free


In [20]:
orderform_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Phillippines  300 non-null    object
 1   Indonesia     300 non-null    object
 2   Malta         300 non-null    object
 3   India         300 non-null    object
dtypes: object(4)
memory usage: 9.5+ KB


### Step 2 - Hypothesis Formulation     
    H0 : p1 = p2 = p3 = p4  (The Defective % does not vary by Centre.)
    Ha : p1 ≠ p2 ≠ p3 ≠ p4  (The Defective % varies by Centre.) 

In [21]:
orderform_data_2 = pd.DataFrame({'Observed_Values' : ['Error Free','Defective'],
                                    'Phillippines' : orderform_data['Phillippines'].value_counts(),
                                       'Indonesia' : orderform_data['Indonesia'].value_counts(),
                                           'Malta' : orderform_data['Malta'].value_counts(),
                                           'India' : orderform_data['India'].value_counts()
                                })

In [22]:
orderform_data_2

Unnamed: 0,Observed_Values,Phillippines,Indonesia,Malta,India
Error Free,Error Free,271,267,269,280
Defective,Defective,29,33,31,20


In [23]:
orderform_data_2.drop('Observed_Values', axis=1, inplace=True)

### Step 3 - Gathering Evidence

In [24]:
test_stat4, p_value4, df4, expected4 = stats.chi2_contingency(orderform_data_2)

In [25]:
p_value4

0.2771020991233135

### Step 4 - Checking the Sufficiency of Evidence with Significance Value

In [26]:
if p_value4< 0.05:
    print('Reject Null Hypothesis(H0)')
else:
    print('Do Not Reject Null Hypothesis(H0)')

Do Not Reject Null Hypothesis(H0)
