In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [13]:
def load_data(file_path):
    """Load e-commerce sales data from a CSV file."""
    return pd.read_csv(file_path)

In [14]:
daily_inventory = load_data('../data/daily_inventory.csv')
inventory_snapshot = load_data('../data/inventory_snapshot.csv')
products = load_data('../data/products.csv')
purchase_orders = load_data('../data/purchase_orders.csv')
sales = load_data('../data/sales.csv')
suppliers = load_data('../data/suppliers.csv')

In [15]:
# check missing values for each dataset
datasets = {
    "daily_inventory": daily_inventory,
    "inventory_snapshot": inventory_snapshot,
    "products": products,
    "purchase_orders": purchase_orders,
    "sales": sales,
    "suppliers": suppliers
}
for name, dataset in datasets.items():
    missing_values = dataset.isnull().sum()
    print(f"Missing values in {name} dataset:\n{missing_values}\n")

Missing values in daily_inventory dataset:
snapshot_date           0
sku_id                  0
current_stock           0
daily_sales             0
incoming_stock          0
warehouse_stock         0
retail_stock            0
amazon_allocated        0
tiktokshop_allocated    0
zalora_allocated        0
reorder_point           0
safety_stock            0
dtype: int64

Missing values in inventory_snapshot dataset:
snapshot_date           0
sku_id                  0
current_stock           0
incoming_stock          0
stock_age_days          0
warehouse_stock         0
retail_stock            0
amazon_allocated        0
tiktokshop_allocated    0
zalora_allocated        0
reorder_point           0
safety_stock            0
backorder_qty           0
opening_buffer          0
dtype: int64

Missing values in products dataset:
sku_id                  0
product_name            0
category                0
sub_category            0
brand                   0
product_type            0
size_label     

In [19]:
products['parent_sku'].fillna('No Parent', inplace=False)
products['shelf_life_months'].fillna(products['shelf_life_months'].median(), inplace=False)

0     18.0
1     24.0
2     24.0
3     24.0
4     24.0
      ... 
75    24.0
76    24.0
77    24.0
78    24.0
79    12.0
Name: shelf_life_months, Length: 80, dtype: float64

In [20]:
sales['event_name'].fillna('No Event', inplace=False)

0                    No Event
1                    No Event
2                    No Event
3                    No Event
4                    No Event
                 ...         
160126               No Event
160127               No Event
160128               No Event
160129               No Event
160130    12.12 Year-End Sale
Name: event_name, Length: 160131, dtype: object

In [22]:
purchase_orders.dropna(inplace=False)

Unnamed: 0,po_id,sku_id,supplier_id,po_date,promised_delivery_date,delivery_date,order_qty,unit_cost,shipping_mode,status,incoterm,currency,freight_cost,duty_cost
0,PO03732421,SKU9310518635,200139,2019-05-22,2019-06-12,2019-06-18,106,13.65,Air,delivered,EXW,EUR,7.01,10.44
1,PO72647743,SKU9310518635,200139,2019-06-03,2019-06-24,2019-06-27,159,11.37,Air,delivered,EXW,EUR,25.97,17.82
2,PO41745551,SKU9310518635,200139,2019-09-03,2019-09-24,2019-09-19,154,12.03,Air,delivered,EXW,EUR,8.51,16.78
3,PO95292145,SKU9310518635,200139,2019-12-04,2019-12-25,2019-12-21,94,14.15,Air,delivered,EXW,EUR,18.91,6.70
4,PO94595741,SKU9310518635,200139,2020-03-04,2020-03-25,2020-03-25,90,11.42,Air,delivered,EXW,EUR,20.69,6.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1139,PO84747903,SKU0207806619,200439,2019-09-03,2019-09-24,2019-09-26,80,15.73,Sea,delivered,CIF,USD,9.14,10.98
1140,PO78000242,SKU0207806619,200439,2019-12-04,2019-12-25,2019-12-25,80,17.21,Sea,delivered,FOB,USD,13.00,8.12
1141,PO30534454,SKU0207806619,200439,2020-03-04,2020-03-25,2020-03-24,80,15.49,Sea,delivered,DAP,USD,10.40,6.96
1142,PO82682604,SKU0207806619,200439,2020-06-03,2020-06-24,2020-06-24,80,17.45,Sea,delivered,CIF,USD,14.00,8.11


In [23]:
purchase_orders

Unnamed: 0,po_id,sku_id,supplier_id,po_date,promised_delivery_date,delivery_date,order_qty,unit_cost,shipping_mode,status,incoterm,currency,freight_cost,duty_cost
0,PO03732421,SKU9310518635,200139,2019-05-22,2019-06-12,2019-06-18,106,13.65,Air,delivered,EXW,EUR,7.01,10.44
1,PO72647743,SKU9310518635,200139,2019-06-03,2019-06-24,2019-06-27,159,11.37,Air,delivered,EXW,EUR,25.97,17.82
2,PO41745551,SKU9310518635,200139,2019-09-03,2019-09-24,2019-09-19,154,12.03,Air,delivered,EXW,EUR,8.51,16.78
3,PO95292145,SKU9310518635,200139,2019-12-04,2019-12-25,2019-12-21,94,14.15,Air,delivered,EXW,EUR,18.91,6.70
4,PO94595741,SKU9310518635,200139,2020-03-04,2020-03-25,2020-03-25,90,11.42,Air,delivered,EXW,EUR,20.69,6.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1139,PO84747903,SKU0207806619,200439,2019-09-03,2019-09-24,2019-09-26,80,15.73,Sea,delivered,CIF,USD,9.14,10.98
1140,PO78000242,SKU0207806619,200439,2019-12-04,2019-12-25,2019-12-25,80,17.21,Sea,delivered,FOB,USD,13.00,8.12
1141,PO30534454,SKU0207806619,200439,2020-03-04,2020-03-25,2020-03-24,80,15.49,Sea,delivered,DAP,USD,10.40,6.96
1142,PO82682604,SKU0207806619,200439,2020-06-03,2020-06-24,2020-06-24,80,17.45,Sea,delivered,CIF,USD,14.00,8.11
