In [2]:
pip install faker

Collecting faker
  Downloading Faker-18.13.0-py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 325 kB/s eta 0:00:01
Installing collected packages: faker
Successfully installed faker-18.13.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
from faker import Faker
import random
from datetime import datetime, timedelta

fake = Faker()
np.random.seed(42)
random.seed(42)

# --- Customers ---
regions = ['Tehran', 'Isfahan', 'Shiraz', 'Mashhad', 'Tabriz', 'Karaj']
age_groups = ['18-25', '26-35', '36-45', '46-60']
genders = ['Male', 'Female']

customers = []
for i in range(1, 301):
    customers.append({
        'customer_id': f'C{i:04d}',
        'customer_name': fake.name(),
        'gender': random.choice(genders),
        'age_group': random.choice(age_groups),
        'region': random.choice(regions)
    })
customers_df = pd.DataFrame(customers)
customers_df.to_csv('customers.csv', index=False)

# --- Products ---
categories = {
    'Electronics': ['Laptop', 'Smartphone', 'Headphones'],
    'Home': ['Vacuum Cleaner', 'Air Purifier', 'Cookware Set'],
    'Beauty': ['Perfume', 'Face Cream', 'Hair Dryer'],
    'Fashion': ['Sneakers', 'Jacket', 'Jeans'],
    'Food': ['Coffee', 'Chocolate Box', 'Olive Oil']
}

products = []
pid = 1
for cat, subcats in categories.items():
    for sub in subcats:
        for _ in range(10):
            cost = random.randint(200, 2000)
            price = cost * random.uniform(1.2, 1.8)
            products.append({
                'product_id': f'P{pid:04d}',
                'category': cat,
                'sub_category': sub,
                'product_name': f'{fake.word().capitalize()} {sub}',
                'cost_price': round(cost, 2),
                'unit_price': round(price, 2)
            })
            pid += 1
products_df = pd.DataFrame(products)
products_df.to_csv('products.csv', index=False)

# --- Orders ---
orders = []
for i in range(1, 801):
    cid = random.choice(customers_df['customer_id'])
    order_date = fake.date_between(start_date='-6M', end_date='today')
    ship_date = order_date + timedelta(days=random.randint(1, 7))
    orders.append({
        'order_id': f'O{i:04d}',
        'customer_id': cid,
        'order_date': order_date,
        'ship_date': ship_date,
        'ship_mode': random.choice(['Standard', 'Express']),
        'region': customers_df.loc[customers_df['customer_id'] == cid, 'region'].values[0]
    })
orders_df = pd.DataFrame(orders)
orders_df.to_csv('orders.csv', index=False)

# --- Order Items ---
order_items = []
oid_counter = 1
for order in orders_df['order_id']:
    for _ in range(random.randint(1, 3)):
        product = products_df.sample(1).iloc[0]
        qty = random.randint(1, 4)
        discount = random.choice([0, 0.05, 0.1, 0.15])
        profit = (product.unit_price - product.cost_price) * qty * (1 - discount)
        order_items.append({
            'order_item_id': oid_counter,
            'order_id': order,
            'product_id': product.product_id,
            'quantity': qty,
            'discount': discount,
            'profit': round(profit, 2)
        })
        oid_counter += 1
order_items_df = pd.DataFrame(order_items)
order_items_df.to_csv('order_items.csv', index=False)

# --- Payments ---
methods = ['Credit Card', 'Online Payment', 'Cash on Delivery']
payments = []
for i, order in enumerate(orders_df['order_id']):
    total_amount = order_items_df.loc[order_items_df['order_id'] == order, 'profit'].sum() * random.uniform(1.2, 1.4)
    payments.append({
        'payment_id': i + 1,
        'order_id': order,
        'payment_method': random.choice(methods),
        'payment_amount': round(total_amount, 2),
        'payment_status': random.choice(['Paid', 'Pending'])
    })
payments_df = pd.DataFrame(payments)
payments_df.to_csv('payments.csv', index=False)

print("✅ All CSV files created successfully!")


✅ All CSV files created successfully!
