In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import chisquare # Statistical test (chistat, pvalue)
from scipy.stats import chi2
from scipy.stats import chi2_contingency

In [9]:
df = pd.read_csv("https://drive.google.com/uc?id=19408kW52zqfWoQi9hLNd9gRxqH9zCAk7")
df.head(5)


Unnamed: 0,ID,Warehouse_block,Mode_of_Shipment,Customer_care_calls,Customer_rating,Customer_satisfaction,Cost_of_the_Product,Prior_purchases,Product_importance,Gender,Discount_offered,Weight_in_gms,weight_in_kgs,Reached.on.Time_Y.N,Delivery_output
0,1,D,Flight,4,2,Unsatisfied,177,3,low,Female,44,1233,1.233,1,Not On Time
1,2,F,Flight,4,5,Highly Satisfied,216,2,low,Male,59,3088,3.088,1,Not On Time
2,3,A,Flight,2,2,Unsatisfied,183,4,low,Male,48,3374,3.374,1,Not On Time
3,4,B,Flight,3,3,Neutral,176,4,medium,Male,10,1177,1.177,1,Not On Time
4,5,C,Flight,2,2,Unsatisfied,184,3,medium,Female,46,2484,2.484,1,Not On Time


# <font color='purple'>**FINER QUESTION 1.1**</font>

**Does the mode of shipment impact customer satisfaction?**

## <font color='purple'>**STEP 1 : Framing Hypothesis**</font>



**Null Hypothesis (H0):** The mode of shipment does not impact customer satisfaction.

**Alternative Hypothesis (H1)**: The mode of shipment impacts customer satisfaction.

In [3]:
contingency_table = pd.crosstab(df['Customer_satisfaction'], df['Mode_of_Shipment'])

In [4]:
contingency_table

Mode_of_Shipment,Flight,Road,Ship
Customer_satisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Highly Satisfied,353,352,1466
Highly Unsatisfied,369,361,1505
Neutral,330,343,1566
Satisfied,363,357,1469
Unsatisfied,362,347,1456



We perform chi-square test and calculate the P-Value

In [5]:
chi_stat, p_value, df, exp_freq = chi2_contingency(contingency_table) # chi_stat, p_value, df, expected value

print("chi_stat:",chi_stat)
print("p_value:",p_value)
print("df:",df)
print("exp_freq:",exp_freq)

chi_stat: 6.375589990774623
p_value: 0.6052377702533441
df: 8
exp_freq: [[ 350.74706792  347.39158105 1472.86135103]
 [ 361.08691699  357.63251205 1516.28057096]
 [ 361.73315756  358.27257023 1518.99427221]
 [ 353.65515047  350.27184289 1485.07300664]
 [ 349.77770706  346.43149377 1468.79079916]]


In [6]:
alpha = 0.05

if p_value < alpha:
    print("Reject H0")
    print("The mode of shipment impacts customer satisfaction.")
else:
    print("Fail to reject H0")
    print("The mode of shipment does not impact customer satisfaction.")



Fail to reject H0
The mode of shipment does not impact customer satisfaction.


# FINER QUESTION 1.2

**Does the delivery delay impact customer satisfaction?**

Null Hypothesis (H0): Delivery delay does not impact customer satisfaction.

Alternative Hypothesis (H1): Delivery delay impacts customer satisfaction.

In [10]:
df.head(5)

Unnamed: 0,ID,Warehouse_block,Mode_of_Shipment,Customer_care_calls,Customer_rating,Customer_satisfaction,Cost_of_the_Product,Prior_purchases,Product_importance,Gender,Discount_offered,Weight_in_gms,weight_in_kgs,Reached.on.Time_Y.N,Delivery_output
0,1,D,Flight,4,2,Unsatisfied,177,3,low,Female,44,1233,1.233,1,Not On Time
1,2,F,Flight,4,5,Highly Satisfied,216,2,low,Male,59,3088,3.088,1,Not On Time
2,3,A,Flight,2,2,Unsatisfied,183,4,low,Male,48,3374,3.374,1,Not On Time
3,4,B,Flight,3,3,Neutral,176,4,medium,Male,10,1177,1.177,1,Not On Time
4,5,C,Flight,2,2,Unsatisfied,184,3,medium,Female,46,2484,2.484,1,Not On Time


In [11]:
contingency_table1 = pd.crosstab(df['Delivery_output'], df['Customer_satisfaction'])
contingency_table1

Customer_satisfaction,Highly Satisfied,Highly Unsatisfied,Neutral,Satisfied,Unsatisfied
Delivery_output,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Not On Time,1317,1313,1357,1303,1273
On Time,854,922,882,886,892


In [12]:
chi_stat1, p_value1, df1, exp_freq1 = chi2_contingency(contingency_table1) # chi_stat, p_value, df, expected value

print("chi_stat:",chi_stat1)
print("p_value:",p_value1)
print("df:",df1)
print("exp_freq:",exp_freq1)

chi_stat: 3.2000454748311453
p_value: 0.5249236018493662
df: 4
exp_freq: [[1295.41531048 1333.60350941 1335.99027184 1306.15574143 1291.83516683]
 [ 875.58468952  901.39649059  903.00972816  882.84425857  873.16483317]]


In [13]:
alpha = 0.05

if p_value1 < alpha:
    print("Reject H0")
    print("Delivery delay impacts customer satisfaction.")
else:
    print("Fail to reject H0")
    print("Delivery delay does not impact customer satisfaction.")



Fail to reject H0
Delivery delay does not impact customer satisfaction.


# FINER QUESTION 3.1


**3.1 Is there a Significance difference in delivery times between warehouse blocks?**

Null Hypothesis (H0): There is no significant difference in delivery times between warehouse blocks.

Alternative Hypothesis (H1): There is a significant difference in delivery times between warehouse blocks.



In [15]:
from scipy.stats import f_oneway

# Assuming you have a DataFrame named df with a column 'Delivery_output' and another column 'Warehouse_block'
# You'll need to group the delivery times by warehouse blocks
grouped_data = [df[df['Warehouse_block'] == block]['Reached.on.Time_Y.N'] for block in df['Warehouse_block'].unique()]

# Perform one-way ANOVA
f_statistic, p_value = f_oneway(*grouped_data)

# Print the results
print("F-statistic:", f_statistic)
print("p-value:", p_value)


F-statistic: 0.27224986723077316
p-value: 0.896006487864416


In [16]:
alpha = 0.05

if p_value1 < alpha:
    print("Reject H0")
    print("There is a significant difference in delivery times between warehouse blocks.")
else:
    print("Fail to reject H0")
    print("There is no significant difference in delivery times between warehouse blocks.")



Fail to reject H0
There is no significant difference in delivery times between warehouse blocks.


# FINER QUESTION 3.2

**3.2 which shipment mode delivers more on time deliveries?**

Null Hypothesis (H0): There is no significant difference in on-time delivery rates between shipment modes.

Alternative Hypothesis (H1): There is a significant difference in on-time delivery rates between shipment modes.

In [17]:
grouped_data1 = [df[df['Mode_of_Shipment'] == block]['Reached.on.Time_Y.N'] for block in df['Mode_of_Shipment'].unique()]

# Perform one-way ANOVA
f_statistic, p_value = f_oneway(*grouped_data1)

# Print the results
print("F-statistic:", f_statistic)
print("p-value:", p_value)

F-statistic: 0.37164159522543116
p-value: 0.6896100162003029


In [18]:
alpha = 0.05

if p_value1 < alpha:
    print("Reject H0")
    print("There is a significant difference in on-time delivery rates between shipment modes.")
else:
    print("Fail to reject H0")
    print("There is no significant difference in on-time delivery rates between shipment modes.")


Fail to reject H0
There is no significant difference in on-time delivery rates between shipment modes.
