In [None]:
# jupyter_vim
# docker-compose-cdc.yaml automatically spins up containers (see generator service) that publishes messages to the postgres database. Sequin stream cdc to kafka topic 'cdc'. See the sequin service at localhost:7376

In [1]:
import psycopg2
import pandas as pd

conn = psycopg2.connect(
    host="postgres", port=5432, user="admin", password="admin", dbname="cdc"
)

In [None]:
with conn.cursor() as cur:
    cur.execute(f"SELECT count(*) FROM messages;")
    print(cur.fetchall())
    cur.execute(f"SELECT * FROM messages LIMIT 20;")
    values = cur.fetchall()
df = pd.DataFrame(
    values,
    columns=[
        "event_id",
        "origin_id",
        "message",
        "created_at",
        "inserted_at",
        "updated_at",
    ],
)
df.tail(5)

In [3]:
from kafka import KafkaConsumer, TopicPartition
from kafka.admin import KafkaAdminClient, ConfigResource, ConfigResourceType


bootstrap_servers = "kafka-broker:9092"

admin_client = KafkaAdminClient(
    bootstrap_servers=bootstrap_servers,
    client_id="demo",
)
topic = "cdc"

In [None]:
import json

consumer_topic = KafkaConsumer(
    topic,
    bootstrap_servers=["kafka-broker:9092"],
    auto_offset_reset="earliest",
    enable_auto_commit=False,
    consumer_timeout_ms=5000,
)

In [None]:
# Poll with a timeout (in milliseconds)
timeout_ms = 5000
while True:
    # Poll for messages
    message_batch = consumer_topic.poll(timeout_ms=timeout_ms)

    # If no messages received, break the loop
    if not message_batch:
        break

    # Process the messages
    for partition_messages in message_batch.values():
        for message in partition_messages:
            decoded = json.loads(message.value.decode("utf-8"))
            if decoded["action"] == "insert":
                inserts.append(decoded)
            elif decoded["action"] == "delete":
                deletions.append(decoded)
            else:
                print(decoded)
            records.append(decoded)
consumer_topic.close()

In [None]:
print("number of records:", len(records))
print("number of deletions:", len(deletions))
print("number of insertions:", len(inserts))

In [15]:
def quick_topic_count(topic_name, bootstrap_servers=["kafka-broker:9092"]):
    consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers)

    # Get partition information
    partitions = consumer.partitions_for_topic(topic_name)
    if not partitions:
        return 0

    # Create TopicPartition objects
    tps = [TopicPartition(topic_name, p) for p in partitions]

    # Get end offsets (latest messages)
    end_offsets = consumer.end_offsets(tps)

    # Get beginning offsets (earliest messages)
    beginning_offsets = consumer.beginning_offsets(tps)

    # Calculate total
    total = sum(end_offsets[tp] - beginning_offsets[tp] for tp in tps)

    consumer.close()
    return total

In [None]:
quick_topic_count("cdc")