In [3]:
import pandas as pd

DATA_DIR = r"C:\Users\Ryzen\Desktop\Business Analytics Project Portfolio\Order-to-Delivery process"   # example for your attached files
OUT_DIR  = r"C:\Users\Ryzen\Desktop\Business Analytics Project Portfolio\Order-to-Delivery process"   # where to save outputs

## Loading the CSVs

In [5]:
orders = pd.read_csv(DATA_DIR + "/olist_orders_dataset.csv")
items  = pd.read_csv(DATA_DIR + "/olist_order_items_dataset.csv")
cust   = pd.read_csv(DATA_DIR + "/olist_customers_dataset.csv")
sell   = pd.read_csv(DATA_DIR + "/olist_sellers_dataset.csv")
payments = pd.read_csv(DATA_DIR + "/olist_order_payments_dataset.csv")

## Converting timestamp columns

In [6]:
orders["order_purchase_timestamp"] = pd.to_datetime(orders["order_purchase_timestamp"], errors="coerce")
orders["order_approved_at"] = pd.to_datetime(orders["order_approved_at"], errors="coerce")
orders["order_delivered_carrier_date"] = pd.to_datetime(orders["order_delivered_carrier_date"], errors="coerce")
orders["order_delivered_customer_date"] = pd.to_datetime(orders["order_delivered_customer_date"], errors="coerce")
orders["order_estimated_delivery_date"] = pd.to_datetime(orders["order_estimated_delivery_date"], errors="coerce")
items["shipping_limit_date"] = pd.to_datetime(items["shipping_limit_date"], errors="coerce")

## Creating a simple order-level table

In [7]:
order_items_summary = items.groupby("order_id").agg(
    num_items=("order_item_id", "count"),
    basket_value=("price", "sum"),
    freight_value=("freight_value", "sum")
).reset_index()

orders_enriched = orders.merge(order_items_summary, on="order_id", how="left")

# Add customer location
orders_enriched = orders_enriched.merge(
    cust[["customer_id", "customer_city", "customer_state"]],
    on="customer_id",
    how="left"
)

orders_enriched.to_csv(OUT_DIR + "/orders_enriched_simple.csv", index=False)
print("Saved: orders_enriched_simple.csv")

Saved: orders_enriched_simple.csv


## Creating the event log (the main process mining dataset)

In [8]:
events = []

# Order Placed
tmp = orders_enriched[["order_id", "order_purchase_timestamp"]].copy()
tmp.columns = ["case_id", "event_time"]
tmp["activity"] = "Order Placed"
events.append(tmp)

# Order Approved
tmp = orders_enriched[["order_id", "order_approved_at"]].copy()
tmp.columns = ["case_id", "event_time"]
tmp["activity"] = "Order Approved"
events.append(tmp)

# Handed to Carrier
tmp = orders_enriched[["order_id", "order_delivered_carrier_date"]].copy()
tmp.columns = ["case_id", "event_time"]
tmp["activity"] = "Handed to Carrier"
events.append(tmp)

# Delivered to Customer
tmp = orders_enriched[["order_id", "order_delivered_customer_date"]].copy()
tmp.columns = ["case_id", "event_time"]
tmp["activity"] = "Delivered to Customer"
events.append(tmp)

# ing all events into one table
event_log = pd.concat(events, ignore_index=True)

# Removing missing timestamps
event_log = event_log.dropna(subset=["event_time"])

# Sorting events inside each order
event_log = event_log.sort_values(["case_id", "event_time"])

event_log.to_csv(OUT_DIR + "/event_log_order_fulfilment_simple.csv", index=False)
print("Saved: event_log_order_fulfilment_simple.csv")

Saved: event_log_order_fulfilment_simple.csv


## Simple KPI calculation: end-to-end fulfilment time

In [9]:
kpi = orders_enriched[[
    "order_id",
    "order_purchase_timestamp",
    "order_delivered_customer_date",
    "order_estimated_delivery_date"
]].copy()

kpi["end_to_end_days"] = (kpi["order_delivered_customer_date"] - kpi["order_purchase_timestamp"]).dt.days
kpi["sla_breached"] = kpi["order_delivered_customer_date"] > kpi["order_estimated_delivery_date"]

kpi.to_csv(OUT_DIR + "/kpi_simple.csv", index=False)
print("Saved: kpi_simple.csv")

print("Median end-to-end days:", kpi["end_to_end_days"].median())
print("SLA breach rate:", kpi["sla_breached"].mean())

Saved: kpi_simple.csv
Median end-to-end days: 10.0
SLA breach rate: 0.07870998883760219


## Delayed orders Calculation

In [12]:
import pandas as pd
import numpy as np