In [None]:
%pip install kafka-python-ng

In [None]:
bootstrap_servers = ['localhost:29092', 'localhost:29093']

Create Topic

In [None]:
from kafka.admin import KafkaAdminClient, NewTopic

def create_topic(topic_name, num_partitions, replication_factor):
    admin_client = KafkaAdminClient(
        bootstrap_servers=['localhost:29092', 'localhost:29093'],
        client_id='test_client'
    )
    
    topic_list = []
    topic_list.append(NewTopic(name=topic_name, num_partitions=num_partitions, replication_factor=replication_factor))
    admin_client.create_topics(new_topics=topic_list, validate_only=False)

    admin_client.close()
    print(f"Topic '{topic_name}' created successfully.")

if __name__ == "__main__":
    create_topic("csvdata", 1, 2)

List Topic

In [None]:
from kafka.admin import KafkaAdminClient, ConfigResource, ConfigResourceType
from kafka.errors import KafkaError

bootstrap_servers=['localhost:29092', 'localhost:29093']

# Listing Topics
def list_topics(bootstrap_servers):
   try:
       admin_client = KafkaAdminClient(bootstrap_servers=bootstrap_servers)
       topics = admin_client.list_topics()
       admin_client.close()
       return topics
   except KafkaError as e:
       print(f"Failed to list topics: {e}")
       return []
   
list_topics(bootstrap_servers)

Stram Data Produce

In [None]:
from confluent_kafka import Producer
import random
import time


def create_producer(bootstrap_servers):
   return Producer({'bootstrap.servers': bootstrap_servers})


def produce_messages(producer, topic, messages):
   for message in messages:
       producer.produce(topic, message)
       producer.flush()
       time.sleep(random.uniform(0.5, 1.5))


if __name__ == "__main__":
   bootstrap_servers = 'localhost:29092,localhost:29093'
   topic = 'testjava'
   messages = [
       "hello world",
       "hello kafka",
       "hello kafka streams",
       "kafka streams with python",
       "python and kafka",
       "real-time stream processing"
   ]
  
   producer = create_producer(bootstrap_servers)
   produce_messages(producer, topic, messages)


Consume Streamed Data

In [None]:
from confluent_kafka import Consumer
from streamz import Stream
from collections import Counter


def create_consumer_params(bootstrap_servers, group_id):
   return {
       'bootstrap.servers': bootstrap_servers,
       'group.id': group_id,
       'auto.offset.reset': 'earliest'
   }


def consume_and_process(message_batch):
   word_counts = Counter()
  
   for message in message_batch:
       words = message.decode('utf-8').split()
       for word in words:
           word_counts[word] += 1


   print("Current Word Counts:", word_counts)


if __name__ == "__main__":
   bootstrap_servers = 'localhost:29092,localhost:29093'
   group_id = 'wordcount-group'
   topic = 'testjava'  # Ensure topic is a string, not a list
  
   consumer_params = create_consumer_params(bootstrap_servers, group_id)
  
   stream = Stream.from_kafka_batched(topic, consumer_params, poll_interval='1s', start=True)
   stream.map(consume_and_process).sink(lambda x: None)
  
   stream.start()