In [None]:
import json
import random
import time
from datetime import datetime, timedelta
from faker import Faker
from kafka import KafkaProducer
import os


fake = Faker()
# Kafka configuration
kafka_broker = os.getenv('KAFKA_BROKER', 'kafka1:9092')
producer = KafkaProducer(bootstrap_servers=[kafka_broker],
                         value_serializer=lambda x: json.dumps(x).encode('utf-8'))

customers = []
products = []
# Generate Customer Data
def generate_customer():
    customer = {
        "customer_id": fake.uuid4(),
        "name": fake.name(),
        "email": fake.email(),
        "location": fake.address(),
        "age": random.randint(18, 70),
        "gender": random.choice(["Male", "Female", "Other"]),
        "account_created": fake.past_date().isoformat(),
        "last_login": fake.date_time_this_month().isoformat()
    }
    customers.append(customer["customer_id"])
    return customer
# Generate Product Data
def generate_product():
    categories = ['Electronics', 'Books', 'Clothing', 'Home & Garden']
    product = {
        "product_id": fake.uuid4(),
        "name": fake.word().title(),
        "category": random.choice(categories),
        "price": round(random.uniform(10, 500), 2),
        "stock_quantity": random.randint(0, 100),
        "supplier": fake.company(),
        "rating": round(random.uniform(1, 5), 1)
    }
    products.append(product["product_id"])
    return product
# Generate Transaction Data
def generate_transaction():
    customer_id = random.choice(customers)
    product_id = random.choice(products)

    delay_seconds = random.randint(0, 60)    # đến trễ 0–60 giây
    event_time = datetime.now(timezone.utc) - timedelta(seconds=delay_seconds)
    return {
        "transaction_id": fake.uuid4(),
        "customer_id": customer_id,
        "product_id": product_id,
        "quantity": random.randint(1, 5),
        "date_time": fake.date_time_this_year().isoformat(),
        "status": random.choice(["completed", "pending", "canceled"]),
        "payment_method": random.choice(["credit card", "PayPal", "bank transfer"]),
        "event_time" : event_time
    }
    
def send_data():
    # Higher chance to create transactions and interactions
    customer = generate_customer()
    product = generate_product()
    transaction = generate_transaction()
    producer.send('ecommerce_transactions', value=transaction)
       
# Parallel Data Generation
with ThreadPoolExecutor(max_workers=5) as executor:
    while True:
        executor.submit(send_data)
        time.sleep(random.uniform(0.01, 0.1))