In [41]:
# Processing libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

# Importing Data

In [42]:
orders = pd.read_csv("../Cleaned_Datasets/orders_generated.csv") #order data file
customer = pd.read_csv("../Cleaned_Datasets/customer_SG_only.csv") #main user data file
products = pd.read_csv("../Cleaned_Datasets/products_cleaned.csv")


In [43]:
orders

Unnamed: 0.1,Unnamed: 0,order_id,order_time,product_id,customer_id
0,0,order_1,2019-11-11 13:50:53.819019,17393,299
1,1,order_2,2019-11-27 05:41:28.793190,7725,299
2,2,order_3,2019-10-19 10:00:48.691390,10948,299
3,3,order_4,2019-10-09 18:24:32.264380,19083,299
4,4,order_5,2019-10-29 04:41:47.170388,7954,299
...,...,...,...,...,...
65888,65888,order_65889,2019-03-18 20:34:28.868262,10328,127744
65889,65889,order_65890,2019-03-15 09:41:12.357500,1794,127744
65890,65890,order_65891,2019-05-05 03:52:57.607549,8767,127744
65891,65891,order_65892,2019-08-16 06:17:57.704049,11112,127744


In [44]:
# Create the delivery dataframe
delivery = orders.copy()
# delivery['product_id'] = weighted_product_ids

# Map product_id back to its category
product_id_to_category = products.set_index('product_id')['main_category'].to_dict()

# Add category column to the delivery dataframe
delivery['main_category'] = delivery['product_id'].map(product_id_to_category)

# Display the distribution of the categories in the delivery dataframe
print(delivery['main_category'].value_counts(normalize=True)*100)

main_category
Health & Beauty           11.881383
Men Clothes               11.861655
Women Clothes             11.092225
Mobile & Accessories       9.146647
Baby & Toys                7.463615
Home & Living              6.584918
Groceries & Pets           5.492237
Home Appliances            4.627199
Automotive                 4.225031
Sports & Outdoor           3.780371
Women's Bags               3.158150
Watches                    2.672515
Men's Bags & Wallets       2.362922
Cameras & Drones           2.309805
Women Shoes                2.171703
Muslim Fashion             2.110998
Fashion Accessories        2.038153
Computer & Accessories     1.912191
Games, Books & Hobbies     1.746771
Men Shoes                  1.025906
Gaming & Consoles          0.763359
Travel & Luggage           0.760324
Tickets & Vouchers         0.537235
Others                     0.274688
Name: proportion, dtype: float64


In [45]:
# Generate delivery times
is_on_time = np.random.rand(len(orders)) < 0.8  # 80% on-time deliveries
delivery_times = np.where(is_on_time, 
                          np.random.normal(loc=1.34, scale=0.3, size=len(orders)),  # On-time deliveries
                          np.random.uniform(2, 5, size=len(orders)))  # Late deliveries between 2 to 5 days

delivery_times = np.maximum(1, delivery_times).round(1)  # Ensure no delivery times below 1 day

# Add delivery times and calculate delivered_date
delivery['delivery_time'] = delivery_times
delivery['delivered_date'] = pd.to_datetime(orders['order_time']) + pd.to_timedelta(delivery['delivery_time'], unit='D')

# Define the delivery costs based on the main_category column
def assign_delivery_cost(row):
    category = row['main_category']  # Use main_category instead of generating a new category
    
    # Define the courier costs based on the options you gave
    if category in ['Men Clothes', 'Women Clothes']:
        return 3.20  # SmartPac delivery for clothes
    elif category == 'Health & Beauty':
        return 3.30  # Ninja Packs (XS) for Health & Beauty
    elif category == 'Mobile & Accessories':
        return 3.98  # Ninja Packs (M) for Mobile & Accessories
    elif category == 'Home & Living':
        return 4.30  # Qxpress Delivery 5kg for Home items
    elif category == 'Automotive':
        return 6.90  # Qxpress Delivery 10kg for Automotive
    elif category == 'Sports & Outdoor':
        return 4.48  # Ninja Packs (Padded M) for Sports & Outdoor
    elif category == 'Groceries & Pets':
        return 3.20  # SmartPac for Groceries & Pets
    elif category == 'Home Appliances':
        return 5.30  # Qxpress Delivery 10kg for Home Appliances
    elif category in ['Women\'s Bags', 'Men\'s Bags & Wallets']:
        return 3.20  # SmartPac for Bags
    elif category in ['Watches', 'Cameras & Drones']:
        return 7.30  # GrabExpress for Watches and Cameras
    else:
        return 3.98  # Default delivery cost

# Apply the delivery cost assignment function to each row based on category_1
delivery['delivery_cost'] = delivery.apply(assign_delivery_cost, axis=1)

delivery

Unnamed: 0.1,Unnamed: 0,order_id,order_time,product_id,customer_id,main_category,delivery_time,delivered_date,delivery_cost
0,0,order_1,2019-11-11 13:50:53.819019,17393,299,Mobile & Accessories,1.6,2019-11-13 04:14:53.819019,3.98
1,1,order_2,2019-11-27 05:41:28.793190,7725,299,Home Appliances,4.3,2019-12-01 12:53:28.793190,5.30
2,2,order_3,2019-10-19 10:00:48.691390,10948,299,Baby & Toys,1.5,2019-10-20 22:00:48.691390,3.98
3,3,order_4,2019-10-09 18:24:32.264380,19083,299,Baby & Toys,1.4,2019-10-11 04:00:32.264380,3.98
4,4,order_5,2019-10-29 04:41:47.170388,7954,299,Men Clothes,1.5,2019-10-30 16:41:47.170388,3.20
...,...,...,...,...,...,...,...,...,...
65888,65888,order_65889,2019-03-18 20:34:28.868262,10328,127744,Baby & Toys,1.2,2019-03-20 01:22:28.868262,3.98
65889,65889,order_65890,2019-03-15 09:41:12.357500,1794,127744,Women Clothes,1.1,2019-03-16 12:05:12.357500,3.20
65890,65890,order_65891,2019-05-05 03:52:57.607549,8767,127744,Baby & Toys,1.5,2019-05-06 15:52:57.607549,3.98
65891,65891,order_65892,2019-08-16 06:17:57.704049,11112,127744,Women Clothes,2.2,2019-08-18 11:05:57.704049,3.20


In [46]:
(delivery.delivery_time > 2).value_counts(normalize=True)*100

delivery_time
False    79.436359
True     20.563641
Name: proportion, dtype: float64

In [47]:
delivery.main_category.value_counts(normalize=True)*100

main_category
Health & Beauty           11.881383
Men Clothes               11.861655
Women Clothes             11.092225
Mobile & Accessories       9.146647
Baby & Toys                7.463615
Home & Living              6.584918
Groceries & Pets           5.492237
Home Appliances            4.627199
Automotive                 4.225031
Sports & Outdoor           3.780371
Women's Bags               3.158150
Watches                    2.672515
Men's Bags & Wallets       2.362922
Cameras & Drones           2.309805
Women Shoes                2.171703
Muslim Fashion             2.110998
Fashion Accessories        2.038153
Computer & Accessories     1.912191
Games, Books & Hobbies     1.746771
Men Shoes                  1.025906
Gaming & Consoles          0.763359
Travel & Luggage           0.760324
Tickets & Vouchers         0.537235
Others                     0.274688
Name: proportion, dtype: float64

In [48]:
delivery['delivery_id'] = np.arange(1, len(delivery) + 1)
delivery

Unnamed: 0.1,Unnamed: 0,order_id,order_time,product_id,customer_id,main_category,delivery_time,delivered_date,delivery_cost,delivery_id
0,0,order_1,2019-11-11 13:50:53.819019,17393,299,Mobile & Accessories,1.6,2019-11-13 04:14:53.819019,3.98,1
1,1,order_2,2019-11-27 05:41:28.793190,7725,299,Home Appliances,4.3,2019-12-01 12:53:28.793190,5.30,2
2,2,order_3,2019-10-19 10:00:48.691390,10948,299,Baby & Toys,1.5,2019-10-20 22:00:48.691390,3.98,3
3,3,order_4,2019-10-09 18:24:32.264380,19083,299,Baby & Toys,1.4,2019-10-11 04:00:32.264380,3.98,4
4,4,order_5,2019-10-29 04:41:47.170388,7954,299,Men Clothes,1.5,2019-10-30 16:41:47.170388,3.20,5
...,...,...,...,...,...,...,...,...,...,...
65888,65888,order_65889,2019-03-18 20:34:28.868262,10328,127744,Baby & Toys,1.2,2019-03-20 01:22:28.868262,3.98,65889
65889,65889,order_65890,2019-03-15 09:41:12.357500,1794,127744,Women Clothes,1.1,2019-03-16 12:05:12.357500,3.20,65890
65890,65890,order_65891,2019-05-05 03:52:57.607549,8767,127744,Baby & Toys,1.5,2019-05-06 15:52:57.607549,3.98,65891
65891,65891,order_65892,2019-08-16 06:17:57.704049,11112,127744,Women Clothes,2.2,2019-08-18 11:05:57.704049,3.20,65892


In [49]:
# Create a mapping from seller_name to unique shop_id
seller_name_to_shop_id = {name: idx for idx, name in enumerate(products['seller_name'].unique(), start=1)}

# Assign shop_id to the products dataframe
products['seller_id'] = products['seller_name'].map(seller_name_to_shop_id)

# Display the first few rows of the updated products dataframe
products

Unnamed: 0.1,Unnamed: 0,price_ori,item_category_detail,title,item_rating,seller_name,price_actual,total_rating,total_sold,favorite,desc,Stock,Ships_From,product_id,main_category,mid_category,sub_category,seller_id
0,0,8.034430,Shopee | Women's Bags | Shoulder Bags | Should...,1081 PU SLING BAG KULIT leather Casual Beg Tan...,4.9,qqfwholesale,4.017215,179.0,179.0,128.0,🎉 WELCOME To QQF WHOLESALE \r\n😍 ALL item read...,2906,Malaysia,1,Women's Bags,Shoulder Bags,Shoulder Bags,1
1,1,0.439091,Shopee | Home Appliances | Sewing Machines | S...,Stat Chinese Traditional Frog Buttons Knot Che...,5.0,stationery.my,0.233559,17.0,17.0,19.0,Chinese Traditional Frog Buttons Knot Cheongsa...,31701,China,2,Home Appliances,Sewing Machines,Sewing Machines and Accessories,2
2,2,2.647002,Shopee | Women's Bags | Handbags | Set Handbags,N09 READY STOCK MYFOOYIN woman handbag set 2in...,4.9,fooyin88,2.647002,8100.0,8100.0,21500.0,READY STOCK\nMEASUREMENT:\nLONG 26CM X HEIGHT ...,2044,Malaysia,3,Women's Bags,Handbags,Set Handbags,3
3,3,2.709284,Shopee | Women Clothes | Maternity Wear | Inne...,🔥Shocking Sale🔥Cotton Comfort Nursing Bra Brea...,4.9,vivi2002,2.709284,6500.0,6500.0,3400.0,PRODUCT DESCRIPTION\nMaterial:95%Cotton+5%Span...,386,Malaysia,4,Women Clothes,Maternity Wear,Innerwear,4
4,4,1245.336605,Shopee | Cameras & Drones | DSLR Cameras | DSLRs,Canon EOS 70D 18-55mm IS STM Original+64gb+ext...,5.0,xpro.gadget,852.957480,10.0,10.0,132.0,Canon 70D\nHighlights\nEF-S 18-55mm f/3.5-5.6 ...,36,Malaysia,5,Cameras & Drones,DSLR Cameras,DSLRs,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20307,20307,15.757447,Shopee | Baby & Toys | Girls Fashion | Girl Shoes,Elegant All-match Girls Leather Flat Shoes Pea...,4.9,amour.my,9.454468,219.0,219.0,1600.0,1 cm = 0.3937 inch \r\n-----------------------...,19076,China,20308,Baby & Toys,Girls Fashion,Girl Shoes,1526
20308,20308,9.965184,Shopee | Baby & Toys | Girls Fashion | Girl Dr...,Baju Raya kids 2023(v1) | Shopee Malaysia,4.8,mazu9,9.965184,215.0,215.0,304.0,M G FASHION\n-stylist fashionable\n-comfortab...,9406,Malaysia,20309,Baby & Toys,Girls Fashion,Girl Dresses,9226
20309,20309,1.550832,Shopee | Muslim Fashion | Kain Pasang & Songke...,Kain como crepe heavy premium (harga untk 0.5m...,4.7,nurlainafashionzone,0.775416,152.0,152.0,98.0,Harga utk 0.5m@setengah meter / HARGA BUKAN U...,446287,Malaysia,20310,Muslim Fashion,Kain Pasang & Songket,Kain Pasang Cotton,3749
20310,20310,0.093424,Shopee | Baby & Toys | Others | Others,Doorgift Bag Birthday Kids Drawstring Organize...,4.9,pbs_empire,0.093424,16.0,16.0,27.0,Custom made Ready Stock in Malaysia\nUnicorn S...,3695,Malaysia,20311,Baby & Toys,Others,Others,9227
