In [35]:
print("hello world!")

hello world!


**What is `faker` in `import faker`**:  
   `faker` is a Python library that generates fake data, such as names, addresses, and emails. It is useful for testing and simulating realistic-looking random data in various domains.

**What is `faker`**:  
   `faker` is a module within the `Faker` library used to generate random fake data, such as names, addresses, and emails. It helps simulate realistic data for development and testing purposes.

**What are `datetime`, `timedelta` in `from datetime import datetime, timedelta`**:  
   `datetime` represents date and time, while `timedelta` represents a duration or difference between two dates. They are used to manipulate and generate dates for order history.

In [36]:
import random
import faker
from datetime import datetime, timedelta


**What is `random.random()`**:  
   `random.random()` returns a random floating-point number between 0 and 1. It’s used to create a probability condition (like deciding if a product should have variants).

In [37]:
# Constants
PRODUCT_CATEGORIES = ["Clothing", "Groceries", "Electronics"]
PRODUCTS = {
    "Clothing": ["T-shirt", "Jeans", "Jacket"],
    "Groceries": ["Rice", "Pasta", "Olive Oil"],
    "Electronics": ["Laptop", "Smartphone", "Headphones", "ipad"]
}
COLORS = ["Red", "Green", "Blue", "Black", "White"]
VARIANT_CHANCE = 0.5  # 50% chance of a product having variants

# Generate Product Data
def generate_products():
    products = []
    product_id = 1

    for category, product_names in PRODUCTS.items():
        for product in product_names:
            # Check if product should have variants
            if (random.random() < VARIANT_CHANCE) and (category != "Groceries" ):
                # Generate variants
                for color in random.sample(COLORS, random.randint(1, 3)):
                    variant_name = f"{color} {product}"
                    products.append({"id": product_id, "category": category, "name": variant_name})
                    product_id += 1
            else:
                # No variants, add the product itself
                price = round(random.uniform(10, 500), 2)
                products.append({"id": product_id, "category": category, "name": product})
                product_id += 1
    
    return products


**What is `faker.Faker()`**:  
   `faker.Faker()` initializes a `Faker` instance that provides access to methods for generating fake data. It acts as a generator for creating random names, addresses, emails, etc.


**What are `faker.Faker().name()`, `faker.Faker().email()`, `faker.Faker().address()`, `faker.Faker().phone_number()`**:  
   These methods from `Faker` generate a fake person’s name, email, address, and phone number. They are useful for simulating customer data in an e-commerce scenario.

In [38]:
# Initialize Faker
fake = faker.Faker()

# Generate Customer Data
def generate_customers(num_customers):
    customers = []
    for _ in range(num_customers):
        customers.append({
            "id": fake.uuid4(),
            "name": fake.name(),
            "email": fake.email(),
            "address": fake.address(),
            "phone_number": fake.phone_number()
        })
    return customers



**What are `random.choice` and `random.randint` in `generate_orders()`**:  
   `random.choice()` selects a random item from a list (e.g., a random product or customer). `random.randint()` generates a random integer within a specified range (e.g., the number of order items).

**What is `datetime.now() - timedelta(days=random.randint(1, 730))` and `random.randint(1, 5)`**:  
   `datetime.now() - timedelta(days=random.randint(1, 730))` generates a random order date within the last two years. `random.randint(1, 5)` sets a random order quantity between 1 and 5.

  - Since, prive is dynamic and can change with time, I've put price column in orders table

- `uuid4()` in `fake.uuid4()` generates a random unique identifier (UUID) based on random numbers, which is commonly used for creating unique IDs. The datatype is a string representing the UUID in its standard 36-character format (including hyphens).

- The line `order_date.strftime("%Y-%m-%d")` formats the `order_date` into a string that follows the `YYYY-MM-DD` format (e.g., `2023-09-30`).

In [39]:
# Generate Order Data
def generate_orders(products, customers, num_orders=100):
    orders = []
    for _ in range(num_orders):
        customer = random.choice(customers)
        product = random.choice(products)
        order_date = datetime.now() - timedelta(days=random.randint(1, 730))  # Random date in the last 2 years
        quantity = random.randint(1, 5)
        price = round(random.uniform(10, 500), 2)  # Random price between $10 and $500
        total_price = price * quantity
        
        orders.append({
            "order_id": fake.uuid4(),
            "customer_id": customer["id"],
            "product_id": product["id"],
            "order_date": order_date.strftime("%Y-%m-%d"),
            # The line order_date.strftime("%Y-%m-%d") formats the order_date into a string that follows the YYYY-MM-DD format (e.g., 2023-09-30)
            "quantity": quantity,
            "product_price": price, # Since, prive is dynamic and can change with time, I've put price column in orders table
            "total_price": total_price
        })
    
    return orders


In [40]:
# Main Function to Generate All Data
def generate_ecommerce_data():
    # Generate products and customers
    products = generate_products()
    customers = generate_customers(10)  # At least 10 customers

    # Generate orders (At least 2 years of order history)
    orders = generate_orders(products, customers, num_orders=200)  # Generate 200 orders

    return products, customers, orders

# Run the generation
products, customers, orders = generate_ecommerce_data()

# Print the data (For demonstration purposes)
print("\nGenerated Products:\n", products)
print("\nGenerated Customers:\n", customers)
print("\nGenerated Orders:\n", orders)

# You can export the data to a database or a file (e.g., CSV, JSON) as per your requirements.



Generated Products:
 [{'id': 1, 'category': 'Clothing', 'name': 'Blue T-shirt'}, {'id': 2, 'category': 'Clothing', 'name': 'Green T-shirt'}, {'id': 3, 'category': 'Clothing', 'name': 'Black T-shirt'}, {'id': 4, 'category': 'Clothing', 'name': 'Jeans'}, {'id': 5, 'category': 'Clothing', 'name': 'Jacket'}, {'id': 6, 'category': 'Groceries', 'name': 'Rice'}, {'id': 7, 'category': 'Groceries', 'name': 'Pasta'}, {'id': 8, 'category': 'Groceries', 'name': 'Olive Oil'}, {'id': 9, 'category': 'Electronics', 'name': 'Laptop'}, {'id': 10, 'category': 'Electronics', 'name': 'Smartphone'}, {'id': 11, 'category': 'Electronics', 'name': 'Black Headphones'}, {'id': 12, 'category': 'Electronics', 'name': 'Green ipad'}, {'id': 13, 'category': 'Electronics', 'name': 'White ipad'}]

Generated Customers:
 [{'id': 'c5a40999-42de-4a85-83c5-4cf45700065d', 'name': 'Eileen Paul', 'email': 'dustinwilliams@example.org', 'address': 'PSC 9272, Box 7184\nAPO AE 85969', 'phone_number': '620-762-0385x553'}, {'id': '

- Converting these to dataframe and saving to csv files.

In [41]:
import pandas as pd

df_products = pd.DataFrame(products)
df_customers = pd.DataFrame(customers)
df_orders = pd.DataFrame(orders)

In [42]:
df_products.head()

Unnamed: 0,id,category,name
0,1,Clothing,Blue T-shirt
1,2,Clothing,Green T-shirt
2,3,Clothing,Black T-shirt
3,4,Clothing,Jeans
4,5,Clothing,Jacket


In [43]:
df_customers.head()

Unnamed: 0,id,name,email,address,phone_number
0,c5a40999-42de-4a85-83c5-4cf45700065d,Eileen Paul,dustinwilliams@example.org,"PSC 9272, Box 7184\nAPO AE 85969",620-762-0385x553
1,347063a8-c7be-4af4-b929-84cd542ca7be,Daniel Campbell,frank39@example.org,"0780 Davis Circle Suite 075\nMargarettown, VA ...",001-365-951-5987
2,eb26de72-4ace-4fbb-8741-4c23e9c36659,Lauren Martin,nwilson@example.org,Unit 8859 Box 3957\nDPO AE 07282,5799441248
3,a04dfab3-750a-4917-8c6f-356c8e305761,Carol Strickland DVM,johnjones@example.net,"4639 Hill Crossroad\nEast Timothy, VI 52441",302.347.1011x5493
4,a9a7f89c-6fbd-446d-a1bc-d036adf3357b,Kayla Chambers,staceytucker@example.org,"21199 Hardin Cliff\nNew Michael, SD 74754",978-841-1664


In [44]:
df_orders.head()

Unnamed: 0,order_id,customer_id,product_id,order_date,quantity,product_price,total_price
0,ecf6d453-4ede-4cd6-a0db-7e8a76d6f611,5c98172f-6e44-433d-bd70-20d711a27402,4,2023-06-19,5,420.65,2103.25
1,e84dbcd0-c793-4f4d-a4de-95f30086d48e,c5a40999-42de-4a85-83c5-4cf45700065d,3,2023-07-13,5,124.98,624.9
2,e5e5c0c0-586d-4a6c-883a-43e40a8fbdaf,eb26de72-4ace-4fbb-8741-4c23e9c36659,3,2023-08-15,5,180.57,902.85
3,043d6cfd-e4d8-4371-bb52-348571cde942,10b36653-5c2e-4687-8484-a63b38c30de3,5,2023-09-22,2,376.98,753.96
4,ecbdf25a-423a-4198-9ee9-a5b9d07b1ede,eb26de72-4ace-4fbb-8741-4c23e9c36659,4,2022-10-15,2,196.55,393.1


In [45]:
df_products.to_csv("products.csv", index=False)
df_customers.to_csv("customers.csv", index=False)
df_orders.to_csv("orders.csv", index=False)