## Kafka_Producer

#### Install Kafka and Zookeeper

In [None]:
!curl -sSOL https://downloads.apache.org/kafka/3.5.0/kafka_2.12-3.5.0.tgz
!tar -xzf kafka_2.12-3.5.0.tgz

In [None]:
!echo "Starting ZooKeeper service..."
!./kafka_2.12-3.5.0/bin/zookeeper-server-start.sh -daemon ./kafka_2.12-3.5.0/config/zookeeper.properties

!echo "Starting Kafka service..."
!./kafka_2.12-3.5.0/bin/kafka-server-start.sh -daemon ./kafka_2.12-3.5.0/config/server.properties

!echo "Waiting for 10 secs until Kafka and ZooKeeper services are up and running..."

!sleep 10

!ps -ef | grep kafka


Starting ZooKeeper service...
Starting Kafka service...
Waiting for 10 secs until Kafka and ZooKeeper services are up and running...
root        1356       1 17 18:32 ?        00:00:01 java -Xmx512M -Xms512M -server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -XX:MaxInlineLevel=15 -Djava.awt.headless=true -Xlog:gc*:file=/content/kafka_2.12-3.5.0/bin/../logs/zookeeper-gc.log:time,tags:filecount=10,filesize=100M -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dkafka.logs.dir=/content/kafka_2.12-3.5.0/bin/../logs -Dlog4j.configuration=file:./kafka_2.12-3.5.0/bin/../config/log4j.properties -cp /content/kafka_2.12-3.5.0/bin/../libs/activation-1.1.1.jar:/content/kafka_2.12-3.5.0/bin/../libs/aopalliance-repackaged-2.6.1.jar:/content/kafka_2.12-3.5.0/bin/../libs/argparse4j-0.7.0.jar:/content/kafka_2.12-3.5.0/bin/../libs/audience-annotations-0.13.0.jar:/cont

#### Run Kafka and Zookeeper in daemon mode on port 9092

In [None]:
!./kafka_2.12-3.5.0/bin/kafka-topics.sh --create --bootstrap-server 127.0.0.1:9092 --replication-factor 1 --partitions 1 --topic yelp_reviews

Created topic yelp_reviews.


#### Create new topics in kafka


In [None]:
!./kafka_2.12-3.5.0/bin/kafka-topics.sh --describe --bootstrap-server 127.0.0.1:9092 --topic yelp_reviews

Topic: yelp_reviews	TopicId: 3iuKdaZbRgmBHiFuu8D57w	PartitionCount: 1	ReplicationFactor: 1	Configs: 
	Topic: yelp_reviews	Partition: 0	Leader: 0	Replicas: 0	Isr: 0


#### Install OpenJDK

In [None]:
!echo "Installing OpenJDK 8 JDK..."

!apt-get install openjdk-8-jdk-headless -qq > /dev/null

Installing OpenJDK 8 JDK...


#### Install Kafka's client


In [None]:
!pip install kafka-python

Collecting kafka-python
  Downloading kafka_python-2.0.2-py2.py3-none-any.whl (246 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/246.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m246.5/246.5 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kafka-python
Successfully installed kafka-python-2.0.2


#### Import the packages.

In [None]:
import pandas as pd
from kafka import KafkaProducer
import json
import time
from pandas import Timestamp

In [None]:
yelp_reviews = pd.read_json('small_yelp_reviews.json', lines = True)

#yelp_reviews = yelp_reviews.loc [ :9 ]
data = yelp_reviews.to_dict ( "reviews" )

  data = yelp_reviews.to_dict ( "reviews" )


In [None]:
# Number of datapoints and columns
num_datapoints, num_columns = len( yelp_reviews ), len( yelp_reviews.columns )
print("Number of datapoints: ", num_datapoints)
print("Number of columns: ", num_columns)

Number of datapoints:  1000
Number of columns:  9


In [None]:
# Define the Kafka topic
topic = "yelp_reviews"

# Create a Kafka producer
producer = KafkaProducer(bootstrap_servers="localhost:9092")
producer.flush()
# Iterate over the data and send each row to Kafka
rownum = 0
for row in data:
  rownum += 1
  print(row)
  row['date'] = row['date'].strftime('%Y-%m-%d %H:%M:%S')
  message = json.dumps(row).encode("utf-8")

  producer.send(topic, message)
  if rownum == 10:
    rownum = 0
    time.sleep ( 10 )
# Flush the producer
producer.flush()

##### View the data from the topic

In [None]:
!./kafka_2.12-3.5.0/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic housing_data --from-beginning --max-messages 10

{"longitude": -122.05, "latitude": 37.37, "housing_median_age": 27.0, "total_rooms": 3885.0, "total_bedrooms": 661.0, "population": 1537.0, "households": 606.0, "median_income": 6.6085, "median_house_value": 344700.0}
{"longitude": -118.3, "latitude": 34.26, "housing_median_age": 43.0, "total_rooms": 1510.0, "total_bedrooms": 310.0, "population": 809.0, "households": 277.0, "median_income": 3.599, "median_house_value": 176500.0}
{"longitude": -117.81, "latitude": 33.78, "housing_median_age": 27.0, "total_rooms": 3589.0, "total_bedrooms": 507.0, "population": 1484.0, "households": 495.0, "median_income": 5.7934, "median_house_value": 270500.0}
{"longitude": -118.36, "latitude": 33.82, "housing_median_age": 28.0, "total_rooms": 67.0, "total_bedrooms": 15.0, "population": 49.0, "households": 11.0, "median_income": 6.1359, "median_house_value": 330000.0}
{"longitude": -119.67, "latitude": 36.33, "housing_median_age": 19.0, "total_rooms": 1241.0, "total_bedrooms": 244.0, "population": 850.0