In [1]:
import pandas as pd 
import numpy as np 
import random
from datetime import datetime,timedelta

In [5]:
# -----------------------
# Configuration
# -----------------------
np.random.seed(42)
random.seed(42)

NUM_CUSTOMERS = 500
NUM_PRODUCTS = 60
NUM_ORDERS = 10000
NUM_ORDER_ITEMS = 25000

START_SIGNUP_DATE = datetime(2022, 1, 1)
START_ORDER_DATE = datetime(2023, 1, 1)

In [23]:
# -----------------------
# Customers Table
# ------------------------
customers = pd.DataFrame({
    "customer_id": range(1, NUM_CUSTOMERS + 1),   # ✅ numeric ID
    "customer_name": [f"Customer_{i}" for i in range(1, NUM_CUSTOMERS + 1)],  # ✅ name column
    "region": np.random.choice(
        ["North", "South", "East", "West"],
        size=NUM_CUSTOMERS
    ),
    "signup_date": [
        START_SIGNUP_DATE + timedelta(days=random.randint(0, 700))
        for _ in range(NUM_CUSTOMERS)
    ]
})


# -----------------------
# Products Table
# -----------------------
products = pd.DataFrame({
    "product_id":range(1, NUM_PRODUCTS + 1),
    "product_name": [f"Product_{i}" for i in range(1, NUM_PRODUCTS + 1)],
    "category":np.random.choice(
        ["Electronics", "Furniture", "Clothing", "Accessories"],
        size = NUM_PRODUCTS
    ),
    "cost_price": np.random.randint(200, 30000, size=NUM_PRODUCTS)
})

# -----------------------
# Orders Table
# -----------------------
orders = pd.DataFrame({
    "order_id": range(1, NUM_ORDERS + 1),
    "customer_id": np.random.randint(1, NUM_CUSTOMERS + 1, size=NUM_ORDERS),
    "order_date": [
        START_ORDER_DATE + timedelta(days=random.randint(0, 365))
        for _ in range(NUM_ORDERS)
    ]
})

# -----------------------
# Order Items Table
# -----------------------
order_items = pd.DataFrame({
    "order_item_id": range(1, NUM_ORDER_ITEMS + 1),
    "order_id": np.random.randint(1, NUM_ORDERS + 1, size=NUM_ORDER_ITEMS),
    "product_id": np.random.randint(1, NUM_PRODUCTS + 1, size=NUM_ORDER_ITEMS),
    "quantity": np.random.randint(1, 5, size=NUM_ORDER_ITEMS)
})

# Add selling price (cost + margin)
order_items = order_items.merge(
    products[["product_id", "cost_price"]],
    on="product_id",
    how="left"
)

order_items["price"] = (
    order_items["cost_price"]
    * np.random.uniform(1.10, 1.60, size=NUM_ORDER_ITEMS)
).round(2)

order_items = order_items[[
    "order_item_id",
    "order_id",
    "product_id",
    "quantity",
    "price"
]]
    
    
    

In [25]:
customers.head()

Unnamed: 0,customer_id,customer_name,region,signup_date
0,1,Customer_1,South,2022-08-18
1,2,Customer_2,South,2022-02-18
2,3,Customer_3,South,2022-10-12
3,4,Customer_4,West,2023-03-07
4,5,Customer_5,West,2023-06-30


In [13]:
products.head()

Unnamed: 0,product_id,product_name,category,cost_price
0,1,Product_1,Clothing,25747
1,2,Product_2,Electronics,22586
2,3,Product_3,Electronics,23198
3,4,Product_4,Accessories,25942
4,5,Product_5,Clothing,13603


In [15]:
orders.head()

Unnamed: 0,order_id,customer_id,order_date
0,1,229,2023-10-23
1,2,474,2023-04-08
2,3,117,2023-05-11
3,4,318,2023-01-23
4,5,279,2023-12-29


In [17]:
order_items.head()

Unnamed: 0,order_item_id,order_id,product_id,quantity,price
0,1,3558,21,1,7875.96
1,2,3251,48,1,5427.09
2,3,8062,39,1,30457.88
3,4,8610,3,4,35958.79
4,5,2604,29,3,20033.7


In [27]:
# -----------------------
# Save CSV Files
# -----------------------
customers.to_csv("C:/Users/Abhi/Desktop/Projects/sql-business-qa/Data/customers_01.csv", index=False)
products.to_csv("C:/Users/Abhi/Desktop/Projects/sql-business-qa/Data/products_01.csv", index=False)
orders.to_csv("C:/Users/Abhi/Desktop/Projects/sql-business-qa/Data/orders_01.csv", index=False)
order_items.to_csv("C:/Users/Abhi/Desktop/Projects/sql-business-qa/Data/order_items_01.csv", index=False)