In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from functools import reduce

Call the stored Dataframes.

In [2]:
%store -r orders_df
%store -r campaign_data_df
%store -r order_process_data_df
%store -r interndata_study_df

%store -r preparation_df
%store -r readytoship_truck_df
%store -r standard_readytoship_truck_df
%store -r express_readytoship_truck_df
%store -r order_delivery_df
%store -r order_delivery_df_2
%store -r order_delivery_total_df

In [None]:
display('orders_df', orders_df.sample(5))
display('campaign_data_df', campaign_data_df.sample(5))
display('order_process_data_df', order_process_data_df.sample(5))
display('interndata_study_df', interndata_study_df.sample(5))

display('preparation_df', preparation_df.sample(5))
display('readytoship_truck_df', readytoship_truck_df.sample(5))
display('standard_readytoship_truck_df', standard_readytoship_truck_df.sample(5))
display('express_readytoship_truck_df', express_readytoship_truck_df.sample(5))
display('order_delivery_df', order_delivery_df.sample(5))
display('order_delivery_df_2', order_delivery_df_2.sample(5))
display('order_delivery_total_df', order_delivery_total_df.sample(5))

---

### Preparation Duration

In [None]:
preparation_df.sample(5)

In [None]:
preparation_df.shape

Group the data by weekday and find the extrema of the preparation duration for each weekday.

In [None]:
min_preparation_by_weekday = preparation_df.groupby('weekday')['preparation_duration'].min()
max_preparation_by_weekday = preparation_df.groupby('weekday')['preparation_duration'].max()
average_preparation_by_weekday = preparation_df.groupby('weekday')['preparation_duration'].mean()

print(
    f'Minimum preparation duration for each {min_preparation_by_weekday}\n\n'
    f'Maximum preparation duration for each {max_preparation_by_weekday}\n\n'
    f'Average preparation duration for each {average_preparation_by_weekday}\n\n'
    f'Average preparation duration in total {average_preparation_by_weekday.mean()}')

In [None]:
average_preparation_by_weekday.dtypes

In [None]:
average_preparation_by_weekday.sort_values(ascending=False)

Plot the Average Preparation By Weekday.

In [None]:
# Define the custom order of weekdays
custom_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Create a bar plot
plt.figure(figsize=(7, 4))
average_preparation_by_weekday.loc[custom_order].plot(kind='bar', color='skyblue')
plt.title('Average Preparation Duration by Weekday')
plt.xlabel('')
plt.ylabel('Average Preparation Duration (Days)')
plt.xticks(rotation=0)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

---
---

### **KPI 1: Preparation Duration** (order_date - ready_to_ship_date)

Boxplot

In [None]:
# Setting up Seaborn style
sns.set_style("whitegrid")

# Order the weekdays by their natural order
weekdays_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Plotting the data as boxplots with ordered weekdays and blue color
plt.figure(figsize=(7, 4))
sns.boxplot(x='weekday',
            y='preparation_duration',
            data=preparation_df,
            palette='Blues',
            order=weekdays_order,
            medianprops=dict(color='red')
            )
plt.title('Distribution of Preparation Duration by Order Day')
plt.xlabel('Order Day of Week')
plt.ylabel('Preparation Duration (Days)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Violin Plot

In [None]:
# Setting up Seaborn style
sns.set_style("whitegrid")

# Order the weekdays by their natural order
weekdays_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Plotting the data as a violin plot with custom weekday order
plt.figure(figsize=(7, 4))
sns.violinplot(x='weekday',
               y='preparation_duration',
               data=preparation_df,
               color='#387771',
               order=weekdays_order)
plt.title('Distribution of Preparation Duration by Order Day')
plt.xlabel('')
plt.ylabel('Preparation Duration (Days)')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# Define the norm for preparation time
norm_preparation_time = 2

# Create bins fpr categorizing preparation duration
bins = [-np.inf, norm_preparation_time - 1, norm_preparation_time + 1, np.inf]
labels = ['Early', 'On Time', 'Late']

# Create a new column 'status' to indicate the status of each order
preparation_df['status'] = pd.cut(preparation_df['preparation_duration'],
                                  bins = bins,
                                  labels = labels
                                  )

# Count the number of orders in each status category
status_counts = preparation_df['status'].value_counts()

# Print the counts
print(f'Number of orders by {status_counts}')

In [None]:
status_counts_percent = (status_counts / sum(status_counts)).round(2)
print(f'Percentage of orders by {status_counts_percent}')

In [None]:
preparation_df.describe()

---
---

## KPI 2:

In [None]:
readytoship_truck_df.sample(5)

In [None]:
# Group the data by ship mode and calculate the average duration for each mode
average_duration_by_ship_mode = readytoship_truck_df.groupby('ship_mode')['ship_to_truck'].mean()

# Plot the averade duration by ship mode
plt.figure(figsize=(7, 4))
average_duration_by_ship_mode.plot(kind='bar', color='#387771')
plt.title('Average Truckloading Duration')
plt.xlabel('')
plt.ylabel('Average Shipment Duration (Days)')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# Define the custom order of weekdays
weekdays_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Group the data by weekday and ship mode and calculate the average duration for each weekday and ship mode combination
average_duration_by_weekday_and_ship_mode = readytoship_truck_df.groupby(
                                                ['weekday', 'ship_mode']
                                                )['ship_to_truck'].mean().reset_index()

# Plot the average duration by ship mode for each weekday
plt.figure(figsize=(8, 4))
sns.barplot(x='weekday',
            y='ship_to_truck',
            hue='ship_mode',
            data=average_duration_by_weekday_and_ship_mode,
            palette='Blues',
            order=weekdays_order)
plt.title('Average Shipment Duration by Ship Mode for Each Weekday')
plt.xlabel('Weekday')
plt.ylabel('Average Shipment Duration (Days)')
plt.legend(title='Ship Mode', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [18]:
# # Filter the DataFrame for Tuesdays and Thursdays
# tuesday_thursday_orders = readytoship_truck_df[
#                             (readytoship_truck_df['weekday'] == 'Tuesday') |
#                             (readytoship_truck_df['weekday'] == 'Thursday')
#                             ]

# # Print the filtered DataFrame
# tuesday_thursday_orders.head(20)

In [None]:
readytoship_truck_df.sample(5)

In [None]:
# Define the custom order of weekdays
weekdays_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Setting up Seaborn style
sns.set_style("whitegrid")

# Plotting the data as grouped boxplots for each weekday and shipping mode with custom order
plt.figure(figsize=(8, 5))
sns.boxplot(x='weekday',
            y='ship_to_truck',
            hue='ship_mode',
            data=readytoship_truck_df,
            palette='Blues',
            order=weekdays_order,
            dodge=True,
            width=0.6)
plt.title('Distribution of Shipment Duration by Weekday and Shipping Mode')
plt.xlabel('Weekday')
plt.ylabel('Shipment Duration (Days)')
plt.legend(title='Shipping Mode')
# plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


---
---

## KPI 3: Order Delivery Time

In [None]:
order_delivery_df.sample(5)

In [None]:
# Grouping by ship mode and calculating the average delivery time
average_delivery_time = order_delivery_df.groupby('ship_mode')['delivery_time'].mean()

average_delivery_time

In [None]:
# Plotting
plt.figure(figsize=(7, 4))
average_delivery_time.plot(kind='bar', color='#387771')
plt.title('Average Delivery Time by Ship Mode')
plt.xlabel('Ship Mode')
plt.ylabel('Average Delivery Time (days)')
plt.xticks(rotation=0, ha='right')
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

In [None]:
order_delivery_df_2.sample(5)

Calculate the average delivery time.

In [None]:
average_delivery_time_2 = order_delivery_df_2['delivery_time_2'].mean()

average_delivery_time_2

## KPI 4: Total Order Delivery Time

In [None]:
order_delivery_total_df.sample(5)

In [None]:
order_delivery_total_df['order_weekday'] = order_delivery_total_df['order_date'].dt.day_name()

order_delivery_total_df.sample(5)

In [None]:
order_delivery_total_df.dtypes

Group by weekday and calculate average delivery time.

In [None]:
average_delivery_time_weekday = order_delivery_total_df.groupby('order_weekday')['total_delivery_time'].mean()

average_delivery_time_weekday.round(1)

### Boxplot

In [None]:
# Define the custom order of weekdays
weekdays_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Calculate the order time in hours
order_delivery_total_df['order_time_hours'] = (order_delivery_total_df['arrival_scan_date']
                                               - order_delivery_total_df['order_date']
                                               ).dt.total_seconds() / 3600

# Plotting
plt.figure(figsize=(7, 4))
sns.boxplot(x='order_weekday',
            y='order_time_hours',
            data=order_delivery_total_df,
            palette='Blues',
            order=weekdays_order)
plt.title('Order Time in Hours by Weekday')
plt.xlabel('Weekday')
plt.ylabel('Order Time (hours)')
# plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### Boxplot with Median

In [None]:
# Convert order time from hours to days
order_delivery_total_df['order_time_days'] = order_delivery_total_df['order_time_hours'] / 24

# Plotting
plt.figure(figsize=(7, 4))
sns.boxplot(x='order_weekday',
            y='order_time_days',
            data=order_delivery_total_df,
            palette='Blues',
            order=weekdays_order,
            medianprops={'color':'red'})
plt.title('Order Time in Days by Weekday')
plt.xlabel('Weekday')
plt.ylabel('Order Time (days)')
# plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### Line Plot

In [None]:
# Define the custom order of weekdays
weekdays_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Change the order in column 'order_weekday' as in weekdays_order
order_delivery_total_df['order_weekday'] = pd.Categorical(order_delivery_total_df['order_weekday'],
                                                          categories=weekdays_order,
                                                          ordered=True)

# Plotting
plt.figure(figsize=(7, 4))
sns.lineplot(x='order_weekday',
             y='order_time_days',
             data=order_delivery_total_df,
             marker='o',
             color='blue')
plt.title('Order Time in Days by Weekday')
plt.xlabel('Weekday')
plt.ylabel('Order Time (days)')
#plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

### Scatter Plot

In [None]:
# Plotting scatter plot
plt.figure(figsize=(7, 4))
sns.scatterplot(x='order_weekday',
                y='order_time_days',
                data=order_delivery_total_df,
                #color='blue',
                s=80)
plt.title('Order Time in Days by Weekday')
plt.xlabel('Weekday')
plt.ylabel('Order Time (days)')
#plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

### Area Plot

In [None]:
# Plotting area plot
plt.figure(figsize=(7, 4))
sns.lineplot(x='order_weekday',
             y='order_time_days',
             data=order_delivery_total_df,
             marker='o',
             color='blue')
plt.fill_between(order_delivery_total_df['order_weekday'],
                 order_delivery_total_df['order_time_days'],
                 color='skyblue',
                 alpha=0.3)
plt.title('Order Time in Days by Weekday')
plt.xlabel('Weekday')
plt.ylabel('Order Time (days)')
#plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

### Scatter Plot with Average line

In [None]:
# Calculate average delivery time in days
average_delivery_time_days = order_delivery_total_df['total_delivery_time'].mean()

# Plotting scatter plot with average line
plt.figure(figsize=(7, 4))
plt.scatter(order_delivery_total_df.index,
            order_delivery_total_df['total_delivery_time'],
            color='blue',
            alpha=0.5)
plt.axhline(y=average_delivery_time_days,
            color='red',
            linestyle='-',
            label=f'Average Delivery Time: {average_delivery_time_days:.2f} days')
plt.title('Delivery Time for Each Order')
plt.xlabel('Order Index')
plt.ylabel('Delivery Time (days)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

### Bar Plot with 95% Confidence Interval

In [None]:
# Define the order of weekdays
weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

# Calculate average order time and 95th percentile for each weekday
average_order_time = order_delivery_total_df.groupby('order_weekday'
                                                     )['order_time_days'
                                                       ].mean().reindex(weekday_order)
percentile_95 = order_delivery_total_df.groupby('order_weekday'
                                           )['order_time_days'
                                             ].quantile(0.95).reindex(weekday_order)

# Plotting barplot
plt.figure(figsize=(7, 4))
sns.barplot(x=average_order_time.index,
            y=average_order_time.values,
            color='skyblue',
            label='Average Order Time')
plt.errorbar(x=average_order_time.index,
             y=average_order_time.values,
             yerr=np.abs(percentile_95 - average_order_time.values),
             fmt='none',
             ecolor='black',
             capsize=5, label='95% Confidence Interval')
plt.title('Average Delivery Time and 95% Confidence Interval by Weekday')
plt.xlabel('')
plt.ylabel('Delivery Time (days)')
plt.xticks(rotation=0)
plt.legend()
plt.tight_layout()
plt.ylim(0, 18)
plt.show()

In [None]:
# Calculate average order time and 95th percentile for each weekday
average_order_time = order_delivery_total_df.groupby('order_weekday'
                                                     )['order_time_days'
                                                       ].mean().reindex(weekday_order)
percentile_95 = order_delivery_total_df.groupby('order_weekday'
                                                )['order_time_days'
                                                  ].quantile(0.95).reindex(weekday_order)

# Plotting barplot
plt.figure(figsize=(10, 4))
sns.barplot(x=average_order_time.index,
            y=average_order_time.values,
            palette=['#387771'],
            label='Average Order Time')
plt.errorbar(x=average_order_time.index,
             y=average_order_time.values,
             yerr=np.abs(percentile_95 - average_order_time.values),
             fmt='none', ecolor='black',
             capsize=5,
             label='95% Confidence Interval')
plt.title('Average Delivery Time and 95% Confidence Interval by Weekday')
plt.xlabel('')
plt.ylabel('Delivery Time (days)')
plt.xticks(rotation=0)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.ylim(0, 18)
plt.show()

Average days for total delivery for each of the processing categories

In [None]:
total_deliveries_shipmode_df = order_delivery_total_df.merge(order_process_data_df,
                                                             how='outer',
                                                             on='order_id'
                                                             ).groupby('ship_mode'
                                                                       )[['total_delivery_time']
                                                                         ].mean() #.round(2)

total_deliveries_shipmode_df

In [None]:
total_deliveries_shipmode_df.info()

In [None]:
# Plotting barplot
plt.figure(figsize=(7, 4))
plt.bar(total_deliveries_shipmode_df.index,
        total_deliveries_shipmode_df['total_delivery_time'],
        color='#387771')
plt.title('Average Delivery Time')
plt.xlabel('')
plt.ylabel('Total Delivery Time')
plt.tight_layout()
plt.show()

---
---

Save DataFrames in csv files so that they can be used in other programmes.

In [None]:
# Dictionary with all the DataFrames and their names
dataframes = {
    'orders_df': orders_df,
    'campaign_data_df': campaign_data_df,
    'order_process_data_df': order_process_data_df,
    'interndata_study_df': interndata_study_df,
    'preparation_df': preparation_df,
    'readytoship_truck_df': readytoship_truck_df,
    'standard_readytoship_truck_df': standard_readytoship_truck_df,
    'express_readytoship_truck_df': express_readytoship_truck_df,
    'order_delivery_df': order_delivery_df,
    'order_delivery_df_2': order_delivery_df_2,
    'order_delivery_total_df': order_delivery_total_df,
    'total_deliveries_shipmode_df': total_deliveries_shipmode_df
    }

# Save each DataFrame in a separate CSV-File
for name, dataframe in dataframes.items():
    filename = f'{name}.csv'
    dataframe.to_csv(filename, index=False)
    print(f'{filename} saved.')