In [1]:
!pip install kafka-python

Collecting kafka-python
  Downloading kafka_python-2.3.0-py2.py3-none-any.whl.metadata (10.0 kB)
Downloading kafka_python-2.3.0-py2.py3-none-any.whl (326 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m326.3/326.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: kafka-python
Successfully installed kafka-python-2.3.0


In [None]:
# Ensure kafka-python is installed: pip install kafka-python
import time
import json
import random
import uuid
from datetime import datetime
from kafka import KafkaProducer

# Configuration
# Using the service names from docker-compose.yml as we expect this to run inside the docker network
KAFKA_BROKERS = ['kafka1:9092', 'kafka2:9092']

TOPICS = {
    'users': 'new_users',
    'transactions': 'new_transactions',
    'products': 'new_products',
    'sessions': 'new_sessions'
}

def get_producer():
    """Create and return a KafkaProducer instance."""
    try:
        producer = KafkaProducer(
            bootstrap_servers=KAFKA_BROKERS,
            value_serializer=lambda v: json.dumps(v).encode('utf-8')
        )
        print(f"Connected to Kafka at {KAFKA_BROKERS}")
        return producer
    except Exception as e:
        print(f"Error connecting to Kafka: {e}")
        return None

# --- Data Generators ---

def generate_user():
    """Generate a random user record."""
    countries = ["USA", "UK", "Canada", "Germany", "France", "Australia", "India", "China", "Brazil", "Japan"]
    return {
        "user_id": str(uuid.uuid4()),
        "email": f"user_{random.randint(1, 1000000)}@example.com",
        "age": random.randint(18, 90),
        "country": random.choice(countries),
        "registeration_date": datetime.now().isoformat()
    }

def generate_product():
    """Generate a random product record."""
    categories = ["Electronics", "Clothing", "Home", "Books", "Sports", "Beauty", "Toys"]
    return {
        "product_id": str(uuid.uuid4()),
        "name": f"Product_{random.randint(1, 10000)}",
        "category": random.choice(categories),
        "price": round(random.uniform(10.0, 2000.0), 2),
        "inventory": random.randint(0, 1000),
        "ratings": round(random.uniform(1.0, 5.0), 1)
    }

def generate_transaction():
    """Generate a random transaction record."""
    products = []
    total_amount = 0.0
    num_products = random.randint(1, 5)
    
    for _ in range(num_products):
        price = round(random.uniform(10.0, 1000.0), 2)
        quantity = random.randint(1, 3)
        products.append({
            "product_id": str(uuid.uuid4()),
            "quantity": quantity,
            "price": price
        })
        total_amount += price * quantity
    
    return {
        "transaction_id": str(uuid.uuid4()),
        "user_id": str(uuid.uuid4()),
        "timestamp": datetime.now().isoformat(),
        "products": products,
        "total_amount": round(total_amount, 2),
        "payment_method": random.choice(["Credit Card", "PayPal", "Debit Card", "Apple Pay", "Google Pay"])
    }

def generate_session():
    """Generate a random session record."""
    events = []
    base_time = datetime.now()
    num_events = random.randint(1, 10)
    
    for i in range(num_events):
        events.append({
            "eventType": random.choice(["ADD_TO_CART", "REMOVE_FROM_CART", "CLEAR_CART"]),
            "timestamp": base_time.isoformat() 
        })
    
    return {
        "user_id": str(uuid.uuid4()),
        "session_id": str(uuid.uuid4()),
        "timestamp": datetime.now().isoformat(),
        "events": events
    }

def main():
    print("Initializing Kafka Producer...")
    producer = get_producer()
    
    if not producer:
        print("Failed to initialize producer. Please check your Kafka connection settings.")
        return

    print("Starting data stream. Press Ctrl+C to stop.")
    
    try:
        while True:
            # Produce User
            user_data = generate_user()
            producer.send(TOPICS['users'], user_data)
            
            # Produce Product
            product_data = generate_product()
            producer.send(TOPICS['products'], product_data)
            
            # Produce Transaction
            transaction_data = generate_transaction()
            producer.send(TOPICS['transactions'], transaction_data)
            
            # Produce Session
            session_data = generate_session()
            producer.send(TOPICS['sessions'], session_data)
            
            # Flush periodically to ensure data is sent
            producer.flush()
            
            print(f"Produced 4 records (User, Product, Transaction, Session) at {datetime.now().strftime('%H:%M:%S')}")
            
            # Sleep for a random interval to simulate continuous streaming
            time.sleep(random.uniform(1.0, 3.0))

    except KeyboardInterrupt:
        print("\nStopping stream...")
    except Exception as e:
        print(f"\nAn error occurred: {e}")
    finally:
        if producer:
            producer.close()
            print("Producer closed.")

if __name__ == "__main__":
    main()


Initializing Kafka Producer...
Connected to Kafka at ['kafka1:9092', 'kafka2:9092']
Starting data stream. Press Ctrl+C to stop.
Produced 4 records (User, Product, Transaction, Session) at 10:28:36
Produced 4 records (User, Product, Transaction, Session) at 10:28:38
Produced 4 records (User, Product, Transaction, Session) at 10:28:39
Produced 4 records (User, Product, Transaction, Session) at 10:28:40
Produced 4 records (User, Product, Transaction, Session) at 10:28:43
Produced 4 records (User, Product, Transaction, Session) at 10:28:45
Produced 4 records (User, Product, Transaction, Session) at 10:28:46
Produced 4 records (User, Product, Transaction, Session) at 10:28:49
Produced 4 records (User, Product, Transaction, Session) at 10:28:52
Produced 4 records (User, Product, Transaction, Session) at 10:28:53
Produced 4 records (User, Product, Transaction, Session) at 10:28:55
Produced 4 records (User, Product, Transaction, Session) at 10:28:56
Produced 4 records (User, Product, Transacti