In [2]:
import pandas as pd
from faker import Faker
import random
import os

In [3]:
fake = Faker()
random.seed(42)


In [4]:
num_records = 500


In [5]:
def generate_users(n):
    users = []
    for _ in range(n):
        user_id = fake.uuid4()
        name = fake.name()
        email = fake.email()
        signup_date = fake.date_this_decade()
        users.append([user_id, name, email, signup_date])
    return pd.DataFrame(users, columns=["User_ID", "Name", "Email", "Signup_Date"])


In [6]:
def generate_products(n):
    products = []
    for _ in range(n):
        product_id = fake.uuid4()
        product_name = fake.word()
        category = fake.word()
        price = round(random.uniform(5.0, 100.0), 2)
        products.append([product_id, product_name, category, price])
    return pd.DataFrame(products, columns=["Product_ID", "Product_Name", "Category", "Price"])


In [7]:
def generate_transactions(users, products, n):
    transactions = []
    for _ in range(n):
        transaction_id = fake.uuid4()
        user_id = random.choice(users["User_ID"])
        product_id = random.choice(products["Product_ID"])
        quantity = random.randint(1, 5)
        transaction_date = fake.date_this_year()
        transactions.append([transaction_id, user_id, product_id, quantity, transaction_date])
    return pd.DataFrame(transactions, columns=["Transaction_ID", "User_ID", "Product_ID", "Quantity", "Transaction_Date"])


In [8]:
def main():
    users = generate_users(100)
    products = generate_products(50)
    transactions = generate_transactions(users, products, num_records)

    # Create data directory if not exists
    os.makedirs("data/raw", exist_ok=True)

    # Save data to CSV files
    users.to_csv("data/raw/users.csv", index=False)
    products.to_csv("data/raw/products.csv", index=False)
    transactions.to_csv("data/raw/transactions.csv", index=False)

    print("Data generated and saved to 'data/raw/' directory")

In [9]:
if __name__ == "__main__":
    main()


Data generated and saved to 'data/raw/' directory
