# Kafka Message Generation

In [1]:
from confluent_kafka import Producer
import pandas as pd
import json
import time
import os

In [3]:
df =pd.read_csv('D:/GitHub/Data Engineer/data/raw/customers_100.csv')

df.columns

Index(['customer_id', 'name', 'city', 'state', 'country', 'registration_date',
       'is_active'],
      dtype='object')

### Convert local data file into a json file to stream on KAFKA

In [4]:
json_detail = df.to_dict(orient='records')
json_file = 'customers.json'

with open(json_file, 'w') as file:
    json.dump(json_detail, file, indent=4)
    print('file converted')

file converted


### Configuring Python Connector to KAFKA

In [5]:
def read_config():
  # reads the client configuration from client.properties
  # and returns it as a key-value map
  config = {}
  with open("client.properties") as fh:
    for line in fh:
      line = line.strip()
      if len(line) != 0 and line[0] != "#":
        parameter, value = line.strip().split('=', 1)
        config[parameter] = value.strip()
  return config

config = read_config()
producer = Producer(config)

### Converting Dictionary files into Byte type file

In [6]:
topic = 'ecommerce'

with open('customers.json', 'r') as file:
    customers_data = json.load(file)

value = customers_data[0]
key = value['customer_id']
print(key, value)

value = str(value).encode('utf-8')
key = str(key).encode('utf-8')

0 {'customer_id': 0, 'name': 'Customer_0', 'city': 'Pune', 'state': 'Maharashtra', 'country': 'India', 'registration_date': '2023-06-29', 'is_active': False}


### Sending Single Data as Key Value pair onto KAFKA (Streaming)

In [7]:
producer.produce(topic, key=key, value=value)

### Sending multiple messages to KAFKA Cluster

In [None]:
topic = 'ecommerce'

def deliver_status(err, msg):
    if (err):
        print(f'Message delivery failed: {err}')
    else:
        print(f'Message delivered to {msg.topic()} [{msg.partition()}] at offset [{msg.offset()}]')

for record in customers_data:
    try:
        message_value = json.dumps(record)
        message_key = str(int(record['customer_id']) + 10).encode('utf-8')

        producer.produce(topic, key=message_key, value=message_value, callback = deliver_status)
        producer.poll(1)

    except Exception as e:
        print(f'Error sending messages: {e}')

producer.flush()

print('Message send to KAFKA Cluster Successfully')