In [1]:
import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime, timedelta

In [2]:
fake = Faker("en_IN")  # Indian format

NUM_CUSTOMERS = 50
NUM_PRODUCTS = 30
NUM_ORDERS = 100

In [None]:
## Customers

np.random.seed(42)
customers = []
for i in range(NUM_CUSTOMERS):
    name = fake.name()
    email = name.lower().replace(" ", ".") + "@gmail.com"
    phone = str(fake.random_int(min=6000000000, max=9999999999))
    
    customers.append({
        "customer_id": i+1,
        "name": name,
        "email": email,
        "phone": phone,
        "city": fake.city(),
        "state": fake.state(),
        "signup_date": fake.date_between(start_date='-2y', end_date='today').strftime("%Y-%m-%d")
    })

df_customers = pd.DataFrame(customers)
df_customers.to_csv("customers.csv", index=False)


In [None]:
## products
import random
np.random.seed(42)

categories = ["Electronics", "Clothing", "Accessories", "Home", "Books"]

category_items = {
    "Electronics": ["Bluetooth Speaker", "Smartphone Charger", "Headphones", "Smartwatch", "Laptop Sleeve"],
    "Clothing": ["Cotton T-Shirt", "Denim Jeans", "Hoodie", "Jacket", "Sneakers"],
    "Accessories": ["Leather Wallet", "Sunglasses", "Wristband", "Backpack", "Belt"],
    "Home": ["Ceramic Vase", "LED Lamp", "Wall Clock", "Kitchen Knife", "Cushion Cover"],
    "Books": ["Python Programming", "Data Science Handbook", "Machine Learning Guide", "Novel", "Biography"]
}

products = []
for i in range(NUM_PRODUCTS):
    category = random.choice(categories)
    name = random.choice(category_items[category])
    price = round(random.uniform(100, 5000))
    stock = random.randint(10, 500)
    products.append({
        "product_id": i+1,
        "name": name,
        "category": category,
        "price": price,
        "stock": stock
    })

df_products = pd.DataFrame(products)
df_products.to_csv("products.csv",index=False)



In [None]:
## Orders

orders = []
order_items = []
payments = []
shipments = []
np.random.seed(42)

for i in range(NUM_ORDERS):
    customer_id = random.randint(1, NUM_CUSTOMERS)
    order_date = fake.date_time_between(start_date='-1y', end_date='now')
    status = random.choices(["Pending", "Shipped", "Delivered", "Cancelled"], weights=[5, 10, 80, 5])[0]

    orders.append({
        "order_id": i+1,
        "customer_id": customer_id,
        "order_date": order_date,
        "status": status
    })

    # Order Items (1-5 products per order)
    num_items = random.randint(1, 5)
    for _ in range(num_items):
        product_id = random.randint(1, NUM_PRODUCTS)
        quantity = random.randint(1, 3)
        price = df_products.loc[df_products['product_id']==product_id, 'price'].values[0]
        total_price = round(price * quantity, 2)
        order_items.append({
            "order_item_id": len(order_items)+1,
            "order_id": i+1,
            "product_id": product_id,
            "quantity": quantity,
            "total_price": total_price
        })

    # Payments
    payment_modes = ["UPI", "Card", "COD", "NetBanking"]
    amount = sum([item["total_price"] for item in order_items if item["order_id"]==i+1])
    payments.append({
        "payment_id": i+1,
        "order_id": i+1,
        "payment_mode": random.choice(payment_modes),
        "payment_date": order_date + timedelta(days=random.randint(0,2)),
        "amount": amount
    })

    # Shipments
    shipped_date = order_date + timedelta(days=random.randint(0,3))
    delivery_date = shipped_date + timedelta(days=random.randint(1,7))
    delivery_status = "Delivered" if status=="Delivered" else random.choice(["In Transit","Returned"])

    shipments.append({
        "shipment_id": i+1,
        "order_id": i+1,
        "shipped_date": shipped_date.strftime("%Y-%m-%d %H:%M:%S"),   # <-- format here
        "delivery_date": delivery_date.strftime("%Y-%m-%d %H:%M:%S"), # <-- format here
        "delivery_status": delivery_status
})



df_orders = pd.DataFrame(orders)
df_order_items = pd.DataFrame(order_items)
df_payments = pd.DataFrame(payments)
df_shipments = pd.DataFrame(shipments)

df_orders.to_csv("orders.csv",index=False)
df_order_items.to_csv("order_items.csv",index=False)
df_payments.to_csv("payments.csv",index=False)
df_shipments.to_csv("shipments.csv",index=False)