In [1]:
#analyzing the inefficiencies in the delivery process, focusing on the preparation time, courier waiting time, and the perspectives of different stakeholders. Let's start with analyzing the preparation time for orders
#The code loads order data, calculates preparation times, visualizes their distribution, and identifies restaurants with the longest average preparation times

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load and clean the data
orders_df = pd.read_csv('orders.csv')
order_stages_df = pd.read_csv('order_stages.csv')

# Convert timestamp columns to datetime
orders_df['order_promised_delivery'] = pd.to_datetime(orders_df['order_promised_delivery'])
orders_df['restaurant_finished_preparation'] = pd.to_datetime(orders_df['restaurant_finished_preparation'])
order_stages_df['order_stage_start'] = pd.to_datetime(order_stages_df['order_stage_start'])

# Calculate preparation time in minutes
orders_df['preparation_time'] = (orders_df['restaurant_finished_preparation'] - orders_df['order_promised_delivery']).dt.total_seconds() / 60

# Remove any rows with negative preparation time (data inconsistency)
orders_df = orders_df[orders_df['preparation_time'] >= 0]

# Analyze preparation time
plt.figure(figsize=(12, 6))
sns.histplot(orders_df['preparation_time'], bins=50, kde=True)
plt.title('Distribution of Order Preparation Time')
plt.xlabel('Preparation Time (minutes)')
plt.ylabel('Frequency')
plt.savefig('preparation_time_distribution.png')
plt.close()

# Calculate and print summary statistics
prep_time_stats = orders_df['preparation_time'].describe()
print("Preparation Time Statistics (in minutes):")
print(prep_time_stats)

# Identify orders with long preparation times (e.g., > 30 minutes)
long_prep_orders = orders_df[orders_df['preparation_time'] > 30]
print(f"\
Percentage of orders with preparation time > 30 minutes: {len(long_prep_orders) / len(orders_df) * 100:.2f}%")

# Analyze preparation time by restaurant
restaurant_prep_time = orders_df.groupby('restaurant_id')['preparation_time'].mean().sort_values(ascending=False)
plt.figure(figsize=(12, 6))
sns.barplot(x=restaurant_prep_time.index[:20], y=restaurant_prep_time.values[:20])
plt.title('Average Preparation Time by Restaurant (Top 20)')
plt.xlabel('Restaurant ID')
plt.ylabel('Average Preparation Time (minutes)')
plt.xticks(rotation=45)
plt.savefig('avg_prep_time_by_restaurant.png')
plt.close()

print("\
Top 5 restaurants with longest average preparation times:")
print(restaurant_prep_time.head())

Preparation Time Statistics (in minutes):
count    1802.000000
mean        9.429514
std        37.315912
min         0.000000
25%         1.800000
50%         4.508333
75%         9.275000
max      1135.583333
Name: preparation_time, dtype: float64
Percentage of orders with preparation time > 30 minutes: 3.27%
Top 5 restaurants with longest average preparation times:
restaurant_id
1346    1135.583333
195      328.377778
1396     242.450000
117      233.100000
1509      99.654545
Name: preparation_time, dtype: float64


# These results highlight that there is indeed inefficiency in the preparation process, which could be contributing to delays in delivery. Let's analyze the time couriers spend waiting at the restaurant and then look at the problem from different perspectives

In [3]:
#This code snippet calculates and visualizes the distribution of courier waiting times at restaurants, providing summary statistics and the percentage of orders with waits exceeding 10 minutes

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

 #Load and preprocess data
orders_df = pd.read_csv('orders.csv')
order_stages_df = pd.read_csv('order_stages.csv')

# Convert timestamp columns to datetime and remove timezone information
orders_df['order_promised_delivery'] = pd.to_datetime(orders_df['order_promised_delivery']).dt.tz_localize(None)
orders_df['restaurant_finished_preparation'] = pd.to_datetime(orders_df['restaurant_finished_preparation']).dt.tz_localize(None)
order_stages_df['order_stage_start'] = pd.to_datetime(order_stages_df['order_stage_start']).dt.tz_localize(None)

# Merge datasets
merged_df = pd.merge(orders_df, order_stages_df, on='order_id')

# Calculate courier waiting time at restaurant
courier_wait_time = merged_df[merged_df['order_stage'] == 'courier_arrived_at_restaurant'].groupby('order_id').apply(
    lambda x: (x['restaurant_finished_preparation'].iloc[0] - x['order_stage_start'].iloc[0]).total_seconds() / 60
)

# Plot distribution of courier waiting time
plt.figure(figsize=(12, 6))
sns.histplot(courier_wait_time, bins=50, kde=True)
plt.title('Distribution of Courier Waiting Time at Restaurant')
plt.xlabel('Waiting Time (minutes)')
plt.ylabel('Frequency')
plt.savefig('courier_waiting_time_distribution.png')
plt.close()

# Calculate and print summary statistics
wait_time_stats = courier_wait_time.describe()
print("Courier Waiting Time Statistics (in minutes):")
print(wait_time_stats)

# Calculate percentage of orders where courier waits more than 10 minutes
long_wait_orders = courier_wait_time[courier_wait_time > 10]
print(f"\
Percentage of orders where courier waits > 10 minutes: {len(long_wait_orders) / len(courier_wait_time) * 100:.2f}%")

# Return the image URL
print("\
Image URL:")
print("{image_urls_dict['courier_waiting_time_distribution.png']}")

Courier Waiting Time Statistics (in minutes):
count    64145.000000
mean         0.751842
std          7.283964
min       -118.650000
25%         -2.766667
50%          0.766667
75%          4.233333
max         94.783333
dtype: float64
Percentage of orders where courier waits > 10 minutes: 7.07%
Image URL:
{image_urls_dict['courier_waiting_time_distribution.png']}


In [None]:
Percentage of Orders where Courier Waits > 10 minutes:
Percentage of orders where courier waits > 10 minutes: 7.07%

In [4]:
#let's analyze the problem from the perspectives of the customer, courier, and restaurant owner. We'll start by looking at the total delivery time and its impact on customer satisfaction.

#The code calculates and visualizes the total delivery time for orders at different stages, providing insights into average delays.

# Analyze the impact on delivery time from the customer's perspective

# Calculate total delivery time
merged_df['total_delivery_time'] = (merged_df['order_stage_start'] - merged_df['order_promised_delivery']).dt.total_seconds() / 60

# Plot total delivery time by order stage
plt.figure(figsize=(10, 6))
sns.boxplot(data=merged_df, x='order_stage', y='total_delivery_time')
plt.title('Total Delivery Time by Order Stage')
plt.xlabel('Order Stage')
plt.ylabel('Total Delivery Time (minutes)')
plt.xticks(rotation=45)
plt.savefig('delivery_time_by_stage.png')
plt.close()

# Calculate average total delivery time by order stage
avg_delivery_time_by_stage = merged_df.groupby('order_stage')['total_delivery_time'].mean().sort_values(ascending=False)
print("\
Average total delivery time by order stage:")
print(avg_delivery_time_by_stage)

# Return the image URL
print("\
Image URL:")
print("{image_urls_dict['delivery_time_by_stage.png']}")

Average total delivery time by order stage:
order_stage
courier_delivered_order           0.635224
courier_arrived_at_customer      -1.553579
courier_picked_up_order         -13.061303
courier_arrived_at_restaurant   -17.789415
courier_accepts_order           -22.274378
order_proposed_to_courier       -22.467634
Name: total_delivery_time, dtype: float64
Image URL:
{image_urls_dict['delivery_time_by_stage.png']}


Next, let's analyze the problem from the perspectives of the courier and restaurant owner. We'll look at the courier's travel times and the restaurant's preparation efficiency.

In [8]:
#This code calculates and visualizes the estimated travel times of couriers to restaurants and customers, providing summary statistics and saving the distribution plot as an image.


# Analyze courier's travel times

# Calculate travel time to restaurant and to customer
order_stages_df['estimated_travel_time_to_restaurant'] = order_stages_df['estimated_travel_time_in_seconds_to_restaurant'] / 60
order_stages_df['estimated_travel_time_to_customer'] = order_stages_df['estimated_travel_time_in_seconds_to_eater'] / 60

# Plot travel times
plt.figure(figsize=(12, 6))
sns.histplot(order_stages_df['estimated_travel_time_to_restaurant'].dropna(), bins=50, kde=True, color='blue', label='To Restaurant')
sns.histplot(order_stages_df['estimated_travel_time_to_customer'].dropna(), bins=50, kde=True, color='green', label='To Customer')
plt.title('Distribution of Courier Travel Times')
plt.xlabel('Travel Time (minutes)')
plt.ylabel('Frequency')
plt.legend()
plt.savefig('courier_travel_times.png')
plt.close()

# Calculate and print summary statistics
travel_time_stats = order_stages_df[['estimated_travel_time_to_restaurant', 'estimated_travel_time_to_customer']].describe()
print("Courier Travel Time Statistics (in minutes):")
print(travel_time_stats)

# Return the image URL
print("\
Image URL:")
print("{image_urls_dict['courier_travel_times.png']}")



Courier Travel Time Statistics (in minutes):
       estimated_travel_time_to_restaurant  estimated_travel_time_to_customer
count                         62055.000000                       64073.000000
mean                              3.019856                           9.332646
std                               2.047247                           5.715181
min                               0.000000                           0.000000
25%                               1.566667                           5.183333
50%                               2.733333                           8.050000
75%                               4.083333                          12.250000
max                              31.500000                          50.283333
Image URL:
{image_urls_dict['courier_travel_times.png']}
