In [12]:
import pandas as pd
from scipy import stats
import numpy as np

# Load data
df = pd.read_csv("food_orders_new_delhi.csv")

# Convert date columns to datetime
df['Order Date and Time'] = pd.to_datetime(df['Order Date and Time'])
df['Delivery Date and Time'] = pd.to_datetime(df['Delivery Date and Time'])

# 2. Clean data
# Standardize discounts (remove text)
df['Discounts and Offers'] = df['Discounts and Offers'].str.replace(r'\D+', '', regex=True).replace('', '0').astype(float)

In [14]:
# 3. Most common payment method
payment_counts = df['Payment Method'].value_counts()
most_common = payment_counts.idxmax()
proportion = payment_counts.max()/len(df)
print(f"3. Most common method: {most_common} ({proportion:.1%})")

3. Most common method: Cash on Delivery (35.7%)


In [16]:
# 4. 95% CI for proportion
se = np.sqrt((proportion*(1-proportion))/len(df))
ci = (proportion - 1.96*se, proportion + 1.96*se)
print(f"4. 95% CI: ({ci[0]:.3f}, {ci[1]:.3f})")

4. 95% CI: (0.327, 0.387)


In [18]:
# 5. Commission fee stats
print(f"5. Commission - Mean: {df['Commission Fee'].mean():.2f}, Median: {df['Commission Fee'].median()}")

5. Commission - Mean: 126.99, Median: 127.0


In [20]:
# 6. Average order value
print(f"6. Avg Order Value: {df['Order Value'].mean():.2f}")

6. Avg Order Value: 1053.97


In [22]:
# 7. Commission probabilities
total = len(df)
p1 = len(df[df['Commission Fee'] > 120])/total
p2 = len(df[df['Commission Fee'] < 143])/total
p3 = len(df[(df['Commission Fee'] >= 86) & (df['Commission Fee'] <= 133)])/total
print(f"7. Probabilities - i) {p1:.1%}, ii) {p2:.1%}, iii) {p3:.1%}")

7. Probabilities - i) 55.1%, ii) 60.5%, iii) 32.2%


In [24]:
# 8. Delivery time for Credit Card
cc_orders = df[df['Payment Method'] == 'Credit Card']
delivery_time = (cc_orders['Delivery Date and Time'] - cc_orders['Order Date and Time']).dt.total_seconds()/60
print(f"8. Avg delivery time (CC): {delivery_time.mean():.1f} minutes")


8. Avg delivery time (CC): 74.0 minutes


In [26]:
# 9. Lowest of top 10% Cash orders
cash_orders = df[df['Payment Method'] == 'Cash on Delivery'].sort_values('Order Value', ascending=False)
top_10 = cash_orders.head(int(len(cash_orders)*0.1))
print(f"9. Lowest in top 10% Cash: {top_10['Order Value'].min()}")

# 10. Highest of bottom 60% Digital
digital_orders = df[df['Payment Method'] == 'Digital Wallet'].sort_values('Order Value')
bottom_60 = digital_orders.head(int(len(digital_orders)*0.6))
print(f"10. Highest in bottom 60% Digital: {bottom_60['Order Value'].max()}")

# 11. Mean difference between Digital vs Cash
digital_mean = df[df['Payment Method'] == 'Digital Wallet']['Order Value'].mean()
cash_mean = df[df['Payment Method'] == 'Cash on Delivery']['Order Value'].mean()
print(f"11. Digital Wallet is ₹{digital_mean - cash_mean:.1f} higher")

# 12. Hypothesis test (μ >53)
delivery_times = (df['Delivery Date and Time'] - df['Order Date and Time']).dt.total_seconds()/60
t_stat, p_val = stats.ttest_1samp(delivery_times, 53, alternative='greater')
print(f"12. {'Reject' if p_val < 0.05 else 'Fail to reject'} H₀ (p={p_val:.4f})")

# 13. IQR of delivery times
q1, q3 = np.percentile(delivery_times, [25, 75])
print(f"13. Middle 50% range: {q1:.0f}-{q3:.0f} minutes")

# 14. Payment methods with refunds
refund_df = df[df['Refunds/Chargebacks'] > 0]
refund_dist = refund_df['Payment Method'].value_counts(normalize=True)
print("14. Refund distribution:")
print(refund_dist)

9. Lowest in top 10% Cash: 1810
10. Highest in bottom 60% Digital: 1186
11. Digital Wallet is ₹-10.0 higher
12. Reject H₀ (p=0.0000)
13. Middle 50% range: 50-96 minutes
14. Refund distribution:
Payment Method
Credit Card         0.357895
Cash on Delivery    0.354386
Digital Wallet      0.287719
Name: proportion, dtype: float64
