In [20]:
# Import the necessary libraries
import pandas as pd
import numpy as np

In [21]:
# Loading 'cartwheelProyect.xlsx' Excel file using pandas and assigning it to 'cartwheel' dataframe
cartwheel = pd.read_excel('CartwheelProyect.xlsx')

In [22]:
# Convert date columns from object to datetime datatype
date_columns = ['Order Created', 'Actual Drop off Time', 
                'Actual Pick up Time', 'Finish Date']
for col in date_columns:
    cartwheel[col] = pd.to_datetime(cartwheel[col], errors='coerce')

In [23]:
# Drop rows with missing values in these columns
cartwheel.dropna(subset=['Order Created', 'Finish Date', 
                         'Actual Drop off Time', 'Actual Pick up Time'], 
                inplace=True)

In [24]:
# Calculate delivery time metric
cartwheel['Delivery Time'] = (cartwheel['Actual Drop off Time'] - 
                              cartwheel['Order Created']).dt.total_seconds() / 3600

In [25]:
# Calculate time between pick-up and drop-off
cartwheel['Pick up to Drop off Time'] = (cartwheel['Actual Drop off Time'] - 
                                         cartwheel['Actual Pick up Time']).dt.total_seconds() / 3600


In [26]:
# NOTE: 'Subtotal' column has too many missing values, so I will not include it in my analysis for integrity of the analysis.

In [46]:
# Convert 'Tip' column to numeric, coerce errors to NaN
cartwheel['Tip'] = pd.to_numeric(cartwheel['Tip'], errors='coerce')

# Fill missing values in 'Tip' with 0 assuming no tip was given for these entries
cartwheel['Tip'].fillna(0, inplace=True)

In [28]:
# NOTE: 'Total Revenue' calculation will be skipped because 'Subtotal' is not available for the big amount of missing values.

In [29]:
# Create a new column for total delivery time
cartwheel['Delivery Time'] = cartwheel['Finish Date'] - cartwheel['Order Created']

In [30]:
# Drop rows where 'Driver' is missing
cartwheel.dropna(subset=['Driver'], inplace=True)

In [49]:
# Analysis of driver efficiency
driver_efficiency = cartwheel.groupby('Driver')['Delivery Time'].mean()
# print(driver_efficiency)

In [50]:
# Analysis of tip trends
cartwheel['Hour of Day'] = cartwheel['Order Created'].dt.hour
tip_trends = cartwheel.groupby('Hour of Day')['Tip'].mean()
# print(tip_trends)

In [33]:
# NOTE: 'Restaurant Delay' column has too many missing values, so I will not include it in my analysis.

In [51]:
# Temporal trends analysis
order_trends = cartwheel.groupby(cartwheel['Order Created'].dt.to_period("M"))['Order Number'].count()
# print(order_trends)

In [52]:
# Ensure 'Hour of Day' is a valid hour
cartwheel = cartwheel.dropna(subset=['Hour of Day'])
cartwheel = cartwheel[cartwheel['Hour of Day'].between(0, 23)]  # assuming 'Hour of Day' is an integer representing the hour

# Select the 'Hour of Day' and 'Tip' columns and export to Excel
cartwheel[['Hour of Day', 'Tip']].to_excel('hourly_tips.xlsx', index=False)

In [37]:
# Select the relevant columns to build the Dashboard
columns_for_dashboard = ['Order Number', 'Order Status', 'Order Created', 'Finish Date', 'Actual Pick up Time', 'Actual Drop off Time', 
                         'Driver', 'Tip', 'Courier Charge', 'Delivery Time', 'Pick up to Drop off Time', 'Cancelled or Returned', 'Hour of Day']

dashboard_data = cartwheel[columns_for_dashboard]

# Save to Excel
dashboard_data.to_excel('DashboardData.xlsx', index=False)

In [54]:
# Number of orders per day
orders_per_day = cartwheel.groupby(cartwheel['Order Created'].dt.date)['Order Number'].count()
orders_per_day = orders_per_day.reset_index()
orders_per_day.columns = ['Date', 'Number of Orders']
orders_per_day.to_excel('orders_per_day.xlsx', index=False)
# print(orders_per_day)
orders_per_day.to_excel('orders_per_day.xlsx', index=False)

# Number of orders per hour
orders_per_hour = cartwheel.groupby(cartwheel['Hour of Day'])['Order Number'].count()
orders_per_hour = orders_per_hour.reset_index()
orders_per_hour.columns = ['Hour of Day', 'Number of Orders']
orders_per_hour.to_excel('orders_per_hour.xlsx', index=False)
# print(orders_per_hour)
orders_per_hour.to_excel('orders_per_hour.xlsx', index=False)

In [45]:
# Delivery time by driver for export to Tableau.
# Export data to Excel without averaging
cartwheel[['Driver', 'Delivery Time']].to_excel('cartwheel_driver_data_driver_time.xlsx', index=False)


In [40]:
# Number of cancellations and returns by reason
cancellations_returns = cartwheel[cartwheel['Order Status'].isin(['Cancelled', 'Returned'])]
cancellations_returns_by_reason = cancellations_returns.groupby('Return Reason')['Order Number'].count()
cancellations_returns_by_reason = cancellations_returns_by_reason.reset_index()
cancellations_returns_by_reason.columns = ['Return Reason', 'Number of Orders']
cancellations_returns_by_reason.to_excel('cancellations_returns_by_reason.xlsx', index=False)

In [42]:
# Average tip by hour of day for export to Tableau.
# Export data to Excel without averaging
cartwheel[['Hour of Day', 'Tip']].to_excel('cartwheel_data_tip_hour.xlsx', index=False)