In [24]:
import os
import json
import time
import numpy as np
import random
import math
import argparse
import io
from avro.io import DatumWriter, BinaryEncoder
from avro.schema import Parse
from faker import Faker
from kafka import KafkaProducer, KafkaConsumer
from dotenv import load_dotenv
from datetime import datetime

In [4]:
# Load Avro schema
schema_path = "../order.avsc"
schema = Parse(open(schema_path, "r").read())

In [5]:
def serialize_avro(record, schema):
    writer = DatumWriter(schema)
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)
    writer.write(record, encoder)
    return bytes_writer.getvalue()

In [6]:
def validate_record(record, schema):
    try:
        writer = DatumWriter(schema)
        buffer = io.BytesIO()
        encoder = BinaryEncoder(buffer)
        writer.write(record, encoder)
        return True
    except Exception as e:
        print("Validation failed:", e)
        return False

In [13]:
test = {"order_id": "1",
    "customer_name": "Sam",
    "product": "Phone",
    "quantity": 1,
    "price": 10.0,
    "order_date": "29/05/2025",
    "order_time": "12:00:00"}
validate_record(test, schema)

True

In [None]:
def create_messages(producer,
                    topic_name,
                    n=10):
    print(f"Producing {n} order messages...")
    faker = Faker()
    for i in range(n):
        curr_time = datetime.now()
        order = {
            "order_id": str(math.floor(curr_time.timestamp())),
            "customer_name": faker.name(),
            "product": faker.word(ext_word_list=["Laptop", "Phone", "Headphones", "Monitor", "Keyboard"]),
            "quantity": int(np.random.choice([1, 2, 3, 4], p=[0.4, 0.3, 0.2, 0.1])),
            "price": round(random.uniform(10, 100), 2),
            "order_date": curr_time.strftime("%Y-%m-%d"),
            "order_time": curr_time.strftime("%H:%M:%S")
        }
        # Serialize the order to Avro format
        if validate_record(order, schema):
            avro_bytes = serialize_avro(order, schema)
            print(type(avro_bytes), avro_bytes)
        # Send the serialized order to Kafka
            producer.send(topic_name,
                          key=order["order_id"],
                          value=avro_bytes)
            print(f"Sent: {order}")
        time.sleep(1)

    producer.flush()

In [17]:
load_dotenv()

True

In [22]:
producer = KafkaProducer(
    bootstrap_servers=os.getenv("BOOTSTRAP_SERVERS"),
    key_serializer=lambda k: k.encode("utf-8"),
    security_protocol='SASL_SSL',
    sasl_mechanism='SCRAM-SHA-256',
    sasl_plain_username=os.getenv("SASL_USERNAME"),
    sasl_plain_password=os.getenv("SASL_PASSWORD")
)

In [23]:
create_messages(producer, 'orders', 1)

Producing 1 order messages...
<class 'bytes'> b'\x141748535547\x1aMichelle Hunt\x14Headphones\x08\\\x8f=B\x142025-05-29\x1017:19:07'
Sent: {'order_id': '1748535547', 'customer_name': 'Michelle Hunt', 'product': 'Headphones', 'quantity': 4, 'price': 47.39, 'order_date': '2025-05-29', 'order_time': '17:19:07'}


In [29]:
bootstrap_servers = os.getenv("BOOTSTRAP_SERVERS")
group_id = 'order-consumer-group'
topic_name = 'orders'
new_topic_name = 'orders_clean'

In [26]:
# Create Kafka consumer
consumer = KafkaConsumer(
    topic_name,
    bootstrap_servers=bootstrap_servers,
    group_id=group_id,
    security_protocol='SASL_SSL',
    sasl_mechanism='SCRAM-SHA-256',
    sasl_plain_username=os.getenv("SASL_USERNAME"),
    sasl_plain_password=os.getenv("SASL_PASSWORD"),
    auto_offset_reset='earliest',
    enable_auto_commit=True,
    key_deserializer=lambda k: k.decode("utf-8") if k else None,
    value_deserializer=lambda v: json.loads(v.decode("utf-8"))
)
print(f"Connecting to Kafka server at {bootstrap_servers}...")
print(f"Consumer group ID: {group_id}")
print(f"Subscribed to topic: {topic_name}. Waiting for messages...\n")

Connecting to Kafka server at d0m54relvm2hk8gkp54g.any.eu-central-1.mpx.prd.cloud.redpanda.com:9092...
Consumer group ID: order-consumer-group
Subscribed to topic: orders. Waiting for messages...



In [27]:
producer = KafkaProducer(
    bootstrap_servers=bootstrap_servers,
    key_serializer=lambda k: k.encode("utf-8"),
    value_serializer=lambda v: json.dumps(v).encode("utf-8"),
    security_protocol='SASL_SSL',
    sasl_mechanism='SCRAM-SHA-256',
    sasl_plain_username=os.getenv("SASL_USERNAME"),
    sasl_plain_password=os.getenv("SASL_PASSWORD")
)

In [34]:
for message in consumer:
    # Print message details
    message_key = message.key
    message_value = message.value
    print(f"Key: {message_key}")
    print(f"Order Data: {json.dumps(message_value, indent=2)}\n---\n")

    order = json.loads(message_value)
    new_order = {
        "order_id": order["order_id"],
        "product": order["product"],
        "total_price": order["quantity"] * order["price"]
    }
    # Send transformed message to new topic
    producer.send(new_topic_name,
                  key=new_order["order_id"],
                  value=json.dumps(new_order))
    print(f"Sent Order ID: {new_order["order_id"]}")

Key: 1748531561
Order Data: "{\"order_id\": \"1748531561\", \"customer_name\": \"Mr. Mason Reid MD\", \"product\": \"Keyboard\", \"quantity\": 1, \"price\": 18.42, \"order_date\": \"2025-05-29\", \"order_time\": \"16:12:41\"}"
---

Sent Order ID: 1748531561
Key: 1748531562
Order Data: "{\"order_id\": \"1748531562\", \"customer_name\": \"Sara Bishop\", \"product\": \"Monitor\", \"quantity\": 2, \"price\": 20.93, \"order_date\": \"2025-05-29\", \"order_time\": \"16:12:42\"}"
---

Sent Order ID: 1748531562
Key: 1748531563
Order Data: "{\"order_id\": \"1748531563\", \"customer_name\": \"Samantha Marsh\", \"product\": \"Monitor\", \"quantity\": 1, \"price\": 82.14, \"order_date\": \"2025-05-29\", \"order_time\": \"16:12:43\"}"
---

Sent Order ID: 1748531563
Key: 1748531564
Order Data: "{\"order_id\": \"1748531564\", \"customer_name\": \"Jacqueline Richardson\", \"product\": \"Phone\", \"quantity\": 1, \"price\": 63.96, \"order_date\": \"2025-05-29\", \"order_time\": \"16:12:44\"}"
---

Sent

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8f in position 38: invalid start byte

In [33]:
order = json.loads(message_value)
order

{'order_id': '1748531560',
 'customer_name': 'Justin Strickland',
 'product': 'Laptop',
 'quantity': 2,
 'price': 34.39,
 'order_date': '2025-05-29',
 'order_time': '16:12:40'}