In [3]:
# ==============================================================
# Supply Chain Project - Week 4: Simple ETL with Pandas & NumPy
# ==============================================================

# Step 1: Import libraries
import pandas as pd
import numpy as np
import os

# Step 2: Create folder structure
os.makedirs("SupplyChain_Week4/Dataset", exist_ok=True)
os.makedirs("SupplyChain_Week4/Deliverables", exist_ok=True)

# Step 3: Create Week 1–3 datasets inside Dataset folder

# orders.csv
orders_df = pd.DataFrame({
    "order_id": [1, 2, 3, 4, 5],
    "customer_id": [1, 2, 3, 4, 5],
    "order_date": ["2025-07-10", "2025-07-11", "2025-07-12", "2025-07-13", "2025-07-14"],
    "delivery_date": ["2025-07-15", "2025-07-20", "2025-07-17", "", "2025-07-22"],
    "total_amount": [1500, 2300, 1800, 2100, 2500],
    "supplier_id": [101, 102, 103, 104, 105]
})
orders_df.to_csv("SupplyChain_Week4/Dataset/orders.csv", index=False)

# suppliers.csv
suppliers_df = pd.DataFrame({
    "supplier_id": [101, 102, 103, 104, 105],
    "supplier_name": ["Alpha Supplies", "Beta Traders", "Gamma Exports", "Delta Goods", "Epsilon Ltd"],
    "contact_email": [
        "alpha@supplies.com",
        "beta@traders.com",
        "gamma@exports.com",
        "delta@goods.com",
        "epsilon@ltd.com"
    ],
    "region": ["North", "South", "East", "West", "North"]
})
suppliers_df.to_csv("SupplyChain_Week4/Dataset/suppliers.csv", index=False)

# inventory.csv
inventory_df = pd.DataFrame({
    "inventory_id": [501, 502, 503, 504, 505],
    "product_name": ["Laptops", "Mobiles", "Tablets", "Printers", "Accessories"],
    "stock_quantity": [50, 100, 60, 40, 150],
    "supplier_id": [101, 102, 103, 104, 105],
    "last_restock_date": ["2025-07-05", "2025-07-07", "2025-07-09", "2025-07-11", "2025-07-13"]
})
inventory_df.to_csv("SupplyChain_Week4/Dataset/inventory.csv", index=False)

print(" Dataset files created inside SupplyChain_Week4/Dataset")

# Step 4: Load the datasets back
orders_df = pd.read_csv("SupplyChain_Week4/Dataset/orders.csv")
suppliers_df = pd.read_csv("SupplyChain_Week4/Dataset/suppliers.csv")
inventory_df = pd.read_csv("SupplyChain_Week4/Dataset/inventory.csv")

# Step 5: Data cleaning & transformation
orders_df['order_date'] = pd.to_datetime(orders_df['order_date'], errors='coerce')
orders_df['delivery_date'] = pd.to_datetime(orders_df['delivery_date'], errors='coerce')
inventory_df['last_restock_date'] = pd.to_datetime(inventory_df['last_restock_date'], errors='coerce')

orders_df['delivery_date'] = orders_df['delivery_date'].fillna(pd.Timestamp.today())
orders_df['delay_days'] = (orders_df['delivery_date'] - orders_df['order_date']).dt.days
orders_df['is_delayed'] = np.where(orders_df['delay_days'] > 3, 1, 0)

# Step 6: Analysis
delayed_orders_supplier = orders_df.groupby('supplier_id')['is_delayed'].sum().reset_index()
avg_stock_supplier = inventory_df.groupby('supplier_id')['stock_quantity'].mean().reset_index()

# Step 7: Save cleaned datasets
orders_df.to_csv("SupplyChain_Week4/Deliverables/cleaned_orders.csv", index=False)
suppliers_df.to_csv("SupplyChain_Week4/Deliverables/cleaned_suppliers.csv", index=False)
inventory_df.to_csv("SupplyChain_Week4/Deliverables/cleaned_inventory.csv", index=False)

# Step 8: Save analysis outputs
delayed_orders_supplier.to_csv("SupplyChain_Week4/Deliverables/delayed_orders_by_supplier.csv", index=False)
avg_stock_supplier.to_csv("SupplyChain_Week4/Deliverables/avg_stock_by_supplier.csv", index=False)

# Step 9: Display samples
print("=== Orders with Delay Flag ===")
print(orders_df[['order_id','supplier_id','order_date','delivery_date','delay_days','is_delayed']].head())

print("\n=== Delayed Orders by Supplier ===")
print(delayed_orders_supplier.head())

print("\n=== Average Stock by Supplier ===")
print(avg_stock_supplier.head())


 Dataset files created inside SupplyChain_Week4/Dataset
=== Orders with Delay Flag ===
   order_id  supplier_id order_date              delivery_date  delay_days  \
0         1          101 2025-07-10 2025-07-15 00:00:00.000000           5   
1         2          102 2025-07-11 2025-07-20 00:00:00.000000           9   
2         3          103 2025-07-12 2025-07-17 00:00:00.000000           5   
3         4          104 2025-07-13 2025-08-19 17:52:59.729516          37   
4         5          105 2025-07-14 2025-07-22 00:00:00.000000           8   

   is_delayed  
0           1  
1           1  
2           1  
3           1  
4           1  

=== Delayed Orders by Supplier ===
   supplier_id  is_delayed
0          101           1
1          102           1
2          103           1
3          104           1
4          105           1

=== Average Stock by Supplier ===
   supplier_id  stock_quantity
0          101            50.0
1          102           100.0
2          103        