Step 1: Simulate Dataset (Optional if you already have a CSV)

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

np.random.seed(42)

n = 1000
categories = ['Electronics', 'Clothing', 'Books', 'Home', 'Beauty']
vendors = ['VendorA', 'VendorB', 'VendorC']
regions = ['North', 'South', 'East', 'West']
returns = ['Returned', 'Not Returned']
reasons = ['Damaged', 'Wrong item', 'Late', 'No reason']

data = []

for i in range(n):
    order_date = datetime(2024, np.random.randint(1, 13), np.random.randint(1, 29))
    ship_days = np.random.randint(1, 4)
    shipping_date = order_date + timedelta(days=ship_days)
    delivery_days = np.random.randint(2, 11)
    delivery_date = shipping_date + timedelta(days=delivery_days)
    delay = (delivery_date - shipping_date).days

    delivery_status = 'Delayed' if delay > 5 else 'Delivered'
    returned = np.random.choice(returns, p=[0.2, 0.8])
    return_reason = np.random.choice(reasons) if returned == 'Returned' else ''

    data.append({
        'Order ID': f'ORD{i:04}',
        'Product Category': np.random.choice(categories),
        'Vendor': np.random.choice(vendors),
        'Order Date': order_date,
        'Shipping Date': shipping_date,
        'Delivery Date': delivery_date,
        'Region': np.random.choice(regions),
        'Delivery Status': delivery_status,
        'Return Status': returned,
        'Return Reason': return_reason
    })

df = pd.DataFrame(data)
df.to_csv('ecommerce_orders.csv', index=False)



Load the data

In [2]:
df = pd.read_csv('ecommerce_orders.csv', parse_dates=['Order Date', 'Shipping Date', 'Delivery Date'])


Add Delay Columns

In [3]:
df['Delay Days'] = (df['Delivery Date'] - df['Shipping Date']).dt.days
df['Is Delayed'] = df['Delay Days'] > 5



Step 4: Return Rate by Category

In [4]:
cat_summary = df.groupby('Product Category').agg(
    Total_Orders=('Order ID', 'count'),
    Returns=('Return Status', lambda x: (x == 'Returned').sum())
)
cat_summary['Return Rate (%)'] = (cat_summary['Returns'] / cat_summary['Total_Orders']) * 100

#Plot return rates by category

import plotly.express as px

fig1= px.bar(
    cat_summary.reset_index(),
    x='Product Category',
    y='Return Rate (%)',
    title='Return Rate by Product Category',
    color='Return Rate (%)',
    text='Return Rate (%)'
)
fig1.show()



Step 5: Return Rate by Vendor

In [5]:
vendor_summary = df.groupby('Vendor').agg(
    Total_Orders=('Order ID', 'count'),
    Returns=('Return Status', lambda x: (x == 'Returned').sum())
)
vendor_summary['Return Rate (%)'] = (vendor_summary['Returns'] / vendor_summary['Total_Orders']) * 100

# 📊 Plot
fig2= px.bar(
    vendor_summary.reset_index(),
    x='Vendor',
    y='Return Rate (%)',
    title='Return Rate by Vendor',
    color='Return Rate (%)',
    text='Return Rate (%)'
)
fig2.show()



Step 6: Average Delivery Time by Region



In [6]:
region_summary = df.groupby('Region').agg(
    Avg_Delay=('Delay Days', 'mean'),
    Total_Returns=('Return Status', lambda x: (x == 'Returned').sum())
)

# 📊 Avg Delay Days by Region
fig3= px.bar(
    region_summary.reset_index(),
    x='Region',
    y='Avg_Delay',
    title='Average Delivery Delay by Region',
    color='Avg_Delay',
    text='Avg_Delay'
)
fig3.show()


Step 7: Delay Distribution by Product Category

In [7]:
fig4= px.box(
    df,
    x='Product Category',
    y='Delay Days',
    color='Product Category',
    title='Delivery Delay Days by Product Category'
)
fig4.show()


 Step 8: Correlation Between Delay and Returns

In [8]:
delay_vs_return = df.groupby('Is Delayed').agg(
    Total=('Order ID', 'count'),
    Returns=('Return Status', lambda x: (x == 'Returned').sum())
)
delay_vs_return['Return Rate (%)'] = (delay_vs_return['Returns'] / delay_vs_return['Total']) * 100

# 📊
fig5= px.bar(
    delay_vs_return.reset_index(),
    x='Is Delayed',
    y='Return Rate (%)',
    title='Return Rate vs Delayed Orders',
    color='Return Rate (%)',
    text='Return Rate (%)'
)
fig5.show()


Summary of Insights:

Vendors with highest return rates and delays

Product categories prone to high return rates

Regions with longest average shipping times

Correlation between shipping delays and returns



Exporting All Plots To Single HTML File

In [9]:
import plotly.io as pio

# List of all your figures
figures = [fig1, fig2, fig3, fig4, fig5]

# Generate HTML strings for each figure
html_parts = [pio.to_html(fig, full_html=False, include_plotlyjs='cdn') for fig in figures]

# Combine them into one full HTML document
full_html = f"""
<html>
<head>
    <title>Spotify Analysis</title>
</head>
<body>
    {''.join(html_parts)}
</body>
</html>
"""

# Save to file
output_path = "E-commerce Order Delay & Returns Analysis.html"
with open(output_path, "w", encoding="utf-8") as f:
    f.write(full_html)

print(f"Saved to {output_path}")

Saved to E-commerce Order Delay & Returns Analysis.html
