In [30]:
# import statements
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas as pd
import json


# read the hotspot_AQUA_streaming.csv file
hotspotAQUA = pd.read_csv("hotspot_AQUA_streaming.csv")

# change to json object 
result_list = hotspotAQUA.to_dict(orient = "records")

In [31]:
# sample data 
result_list[1]

{'latitude': -38.038,
 'longitude': 142.986,
 'confidence': 65,
 'surface_temperature_celcius': 40}

In [32]:
def publish_message(producer_instance, topic_name, key, data):
    try:
        key_bytes = bytes(key, encoding='utf-8')
        producer_instance.send(topic_name, key=key_bytes, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))

In [33]:
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x:dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer

In [34]:
if __name__ == '__main__':
   
    topic = 'partB'
    
    # latest date from climate data in Part A Task 2. Change to datetime type
    latestDate_str = "2021-12-31"
    latestDate = dt.datetime.strptime(latestDate_str, "%Y-%m-%d")
    
    # get kafka producer instance
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    while True: 
        
        # get index of data from result_list randomly
        index = random.randrange(0,len(result_list))
        # get the data fron the list
        data = result_list[index]
        
        # append 24/5 hours from the latest date and store date and time into the selected data
        latestDate = latestDate + dt.timedelta(seconds=24*60*60/5)
        data["date"] = latestDate.strftime("%d/%m/%Y")
        data["time"] = latestDate.strftime("%X")
        
        # add producer information
        data["producer"] = 2
        
        # publish data every 2 seconds
        publish_message(producer, topic, 'hotspotAQUA', data)
        sleep(2)

Publishing records..
Message published successfully. Data: {'latitude': -37.1248, 'longitude': 141.8795, 'confidence': 71, 'surface_temperature_celcius': 52, 'date': '31/12/2021', 'time': '04:48:00', 'producer': 2}
Message published successfully. Data: {'latitude': -35.962, 'longitude': 143.791, 'confidence': 76, 'surface_temperature_celcius': 42, 'date': '31/12/2021', 'time': '09:36:00', 'producer': 2}
Message published successfully. Data: {'latitude': -36.7308, 'longitude': 144.0007, 'confidence': 80, 'surface_temperature_celcius': 54, 'date': '31/12/2021', 'time': '14:24:00', 'producer': 2}
Message published successfully. Data: {'latitude': -37.0135, 'longitude': 143.5822, 'confidence': 69, 'surface_temperature_celcius': 44, 'date': '31/12/2021', 'time': '19:12:00', 'producer': 2}
Message published successfully. Data: {'latitude': -37.7943, 'longitude': 143.0545, 'confidence': 70, 'surface_temperature_celcius': 45, 'date': '01/01/2022', 'time': '00:00:00', 'producer': 2}
Message pub

Message published successfully. Data: {'latitude': -36.2993, 'longitude': 143.2461, 'confidence': 76, 'surface_temperature_celcius': 50, 'date': '08/01/2022', 'time': '19:12:00', 'producer': 2}
Message published successfully. Data: {'latitude': -35.2464, 'longitude': 141.1143, 'confidence': 98, 'surface_temperature_celcius': 45, 'date': '09/01/2022', 'time': '00:00:00', 'producer': 2}
Message published successfully. Data: {'latitude': -36.2929, 'longitude': 144.1411, 'confidence': 71, 'surface_temperature_celcius': 46, 'date': '09/01/2022', 'time': '04:48:00', 'producer': 2}
Message published successfully. Data: {'latitude': -37.394, 'longitude': 143.5541, 'confidence': 83, 'surface_temperature_celcius': 64, 'date': '09/01/2022', 'time': '09:36:00', 'producer': 2}
Message published successfully. Data: {'latitude': -38.0132, 'longitude': 143.2528, 'confidence': 68, 'surface_temperature_celcius': 44, 'date': '09/01/2022', 'time': '14:24:00', 'producer': 2}
Message published successfully.

Message published successfully. Data: {'latitude': -35.7609, 'longitude': 143.8442, 'confidence': 82, 'surface_temperature_celcius': 55, 'date': '17/01/2022', 'time': '09:36:00', 'producer': 2}
Message published successfully. Data: {'latitude': -36.0047, 'longitude': 146.4774, 'confidence': 64, 'surface_temperature_celcius': 42, 'date': '17/01/2022', 'time': '14:24:00', 'producer': 2}
Message published successfully. Data: {'latitude': -35.364, 'longitude': 141.063, 'confidence': 72, 'surface_temperature_celcius': 54, 'date': '17/01/2022', 'time': '19:12:00', 'producer': 2}
Message published successfully. Data: {'latitude': -35.2378, 'longitude': 142.9864, 'confidence': 100, 'surface_temperature_celcius': 94, 'date': '18/01/2022', 'time': '00:00:00', 'producer': 2}
Message published successfully. Data: {'latitude': -36.3313, 'longitude': 147.6089, 'confidence': 74, 'surface_temperature_celcius': 52, 'date': '18/01/2022', 'time': '04:48:00', 'producer': 2}
Message published successfully.

KeyboardInterrupt: 