In [2]:
pip install faker

Collecting faker
  Downloading faker-37.8.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.8.0-py3-none-any.whl (2.0 MB)
   ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
   ----- ---------------------------------- 0.3/2.0 MB ? eta -:--:--
   --------------------- ------------------ 1.0/2.0 MB 4.0 MB/s eta 0:00:01
   ------------------------------------- -- 1.8/2.0 MB 4.0 MB/s eta 0:00:01
   ---------------------------------------- 2.0/2.0 MB 3.9 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.8.0
Note: you may need to restart the kernel to use updated packages.


In [7]:
from faker import Faker
import random
import csv
from datetime import datetime, timedelta

fake = Faker()

NUM_CUSTOMERS = 1000
NUM_CATEGORIES = 20
NUM_PRODUCTS = 2000
NUM_ORDERS = 3000
NUM_ADDRESSES = 1500
NUM_SHIPPINGS = 3000
NUM_PAYMENTS = 3000

In [8]:

# ----------------------------
# 1. Customers
# ----------------------------
customers = []
for i in range(1, NUM_CUSTOMERS + 1):
    customers.append([
        i,
        fake.name(),
        fake.unique.email(),
        fake.phone_number(),
        fake.date_between(start_date="-2y", end_date="today")
    ])

with open("customers.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["customer_id", "customer_name", "email", "phone", "created_at"])
    writer.writerows(customers)


In [19]:
# ----------------------------
# 2. Addresses
# ----------------------------

excluded_countries = {"Israel"}
addresses = []
for i in range(1, NUM_ADDRESSES+1):
    country = fake.country()
    while country in excluded_countries:
        country = fake.country()  

    addresses.append([
        i,
        fake.street_address(),
        fake.city(),
        country,
        fake.postcode(),
        random.randint(1, 1000)  
    ])

with open("addresses.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["address_id", "street", "city", "country", "zip_code", "customer_id"])
    writer.writerows(addresses)


In [17]:
# ----------------------------
# 3. Categories
# ----------------------------
categories_list = [
    "Electronics",
    "Books",
    "Clothing",
    "Home Appliances",
    "Sports",
    "Toys",
    "Beauty & Personal Care",
    "Health",
    "Automotive",
    "Jewelry",
    "Shoes",
    "Groceries",
    "Music",
    "Movies",
    "Garden & Outdoors",
    "Pet Supplies",
    "Office Supplies",
    "Tools & Hardware",
    "Video Games",
    "Baby Products"
]
categories = [[i+1, categories_list[i]] for i in range(NUM_CATEGORIES)]

with open("categories.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["category_id", "category_name"])
    writer.writerows(categories)

In [12]:
# ----------------------------
# 4. Products
# ----------------------------
products = []
for i in range(1, NUM_PRODUCTS + 1):
    products.append([
        i,
        fake.word().capitalize() + " " + fake.word().capitalize(),
        fake.sentence(nb_words=6),
        round(random.uniform(50, 3000), 2),
        random.randint(10, 500),
        random.randint(1, NUM_CATEGORIES)
    ])

with open("products.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["product_id", "product_name", "description", "price", "stock_quantity", "category_id"])
    writer.writerows(products)



In [13]:
# ----------------------------
# 5. Orders
# ----------------------------
orders = []
for i in range(1, NUM_ORDERS + 1):
    order_date = fake.date_between(start_date="-1y", end_date="today")
    status = random.choice(["Pending", "Delivered", "Cancelled"])
    orders.append([
        i,
        order_date,
        status,
        round(random.uniform(100, 5000), 2),
        random.randint(1, NUM_CUSTOMERS)
    ])

with open("orders.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["order_id", "order_date", "status", "total_amount", "customer_id"])
    writer.writerows(orders)



In [20]:
# ----------------------------
# 6. Order Details
# ----------------------------
order_details_set = set()
order_details = []

for order in range(1, NUM_ORDERS + 1):
    for _ in range(random.randint(1, 4)):
        product_id = random.randint(1, NUM_PRODUCTS)
        key = (product_id, order)
        if key not in order_details_set:
            unit_price = round(random.uniform(50, 3000), 2)
            quantity = random.randint(1, 5)
            order_details.append([product_id, order, unit_price, quantity])
            order_details_set.add(key)

with open("order_details.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["product_id", "order_id", "unit_price", "quantity"])
    writer.writerows(order_details)



In [15]:
# ----------------------------
# 7. Shipping
# ----------------------------
shippings = []
for i in range(1, NUM_SHIPPINGS + 1):
    order_id = random.randint(1, NUM_ORDERS)
    shipping_date = fake.date_between(start_date="-1y", end_date="today")
    delivery_date = shipping_date + timedelta(days=random.randint(2, 10)) if random.choice([True, False]) else None
    status = "Delivered" if delivery_date else random.choice(["Pending", "Cancelled"])
    shippings.append([
        i,
        shipping_date,
        delivery_date,
        status,
        order_id,
        random.randint(1, NUM_ADDRESSES)
    ])

with open("shippings.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["shipping_id", "shipping_date", "delivery_date", "status", "order_id", "address_id"])
    writer.writerows(shippings)



In [None]:
# ----------------------------
# 8. Payments
# ----------------------------
payments = []
methods = ["Credit Card", "Debit Card", "PayPal", "Cash"]
for i in range(1, NUM_PAYMENTS + 1):
    order_id = random.randint(1, NUM_ORDERS)
    payment_date = fake.date_between(start_date="-1y", end_date="today")
    payments.append([
        i,
        payment_date,
        round(random.uniform(100, 5000), 2),
        random.choice(methods),
        random.choice(["Completed", "Pending", "Failed"]),
        order_id
    ])

with open("payments.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["payment_id", "payment_date", "amount_paid", "method", "status", "order_id"])
    writer.writerows(payments)

