In [27]:
# import statements
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas as pd
import json


# read the hotspot_TERRA_streaming.csv file
hotspotTERRA = pd.read_csv("hotspot_TERRA_streaming.csv")

# change to json object 
result_list = hotspotTERRA.to_dict(orient = "records")

In [28]:
# sample data
result_list[1]

{'latitude': -35.541,
 'longitude': 143.311,
 'confidence': 82,
 'surface_temperature_celcius': 63}

In [29]:
def publish_message(producer_instance, topic_name, key, data):
    try:
        key_bytes = bytes(key, encoding='utf-8')
        producer_instance.send(topic_name, key=key_bytes, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))

In [30]:
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x:dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer

In [31]:
if __name__ == '__main__':
   
    topic = 'partB'
    
    # latest date from climate data in Part A Task 2. Change to datetime type
    latestDate_str = "2021-12-31"
    latestDate = dt.datetime.strptime(latestDate_str, "%Y-%m-%d")
    
    # get kafka producer instance
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    while True: 
        
        # get index of data from result_list randomly
        index = random.randrange(0,len(result_list))
        # get the data fron the list
        data = result_list[index]
        
        # append 24/5 hours from the latest date and store date and time into the selected data
        latestDate = latestDate + dt.timedelta(seconds=24*60*60/5)
        data["date"] = latestDate.strftime("%d/%m/%Y")
        data["time"] = latestDate.strftime("%X")
        
        # add producer information
        data["producer"] = 3
        
        # publish data every 2 seconds
        publish_message(producer, topic, 'hotspotTERRA', data)
        sleep(2)

Publishing records..
Message published successfully. Data: {'latitude': -37.9071, 'longitude': 143.538, 'confidence': 100, 'surface_temperature_celcius': 88, 'date': '31/12/2021', 'time': '04:48:00', 'producer': 3}
Message published successfully. Data: {'latitude': -36.4142, 'longitude': 143.1077, 'confidence': 100, 'surface_temperature_celcius': 97, 'date': '31/12/2021', 'time': '09:36:00', 'producer': 3}
Message published successfully. Data: {'latitude': -36.2829, 'longitude': 145.825, 'confidence': 100, 'surface_temperature_celcius': 115, 'date': '31/12/2021', 'time': '14:24:00', 'producer': 3}
Message published successfully. Data: {'latitude': -36.4208, 'longitude': 145.5629, 'confidence': 75, 'surface_temperature_celcius': 48, 'date': '31/12/2021', 'time': '19:12:00', 'producer': 3}
Message published successfully. Data: {'latitude': -37.4336, 'longitude': 147.0534, 'confidence': 60, 'surface_temperature_celcius': 51, 'date': '01/01/2022', 'time': '00:00:00', 'producer': 3}
Message

Message published successfully. Data: {'latitude': -36.3045, 'longitude': 141.0771, 'confidence': 74, 'surface_temperature_celcius': 48, 'date': '08/01/2022', 'time': '19:12:00', 'producer': 3}
Message published successfully. Data: {'latitude': -37.3847, 'longitude': 142.8935, 'confidence': 100, 'surface_temperature_celcius': 88, 'date': '09/01/2022', 'time': '00:00:00', 'producer': 3}
Message published successfully. Data: {'latitude': -36.8004, 'longitude': 142.4415, 'confidence': 66, 'surface_temperature_celcius': 52, 'date': '09/01/2022', 'time': '04:48:00', 'producer': 3}
Message published successfully. Data: {'latitude': -36.275, 'longitude': 142.785, 'confidence': 72, 'surface_temperature_celcius': 45, 'date': '09/01/2022', 'time': '09:36:00', 'producer': 3}
Message published successfully. Data: {'latitude': -36.0411, 'longitude': 141.7071, 'confidence': 69, 'surface_temperature_celcius': 44, 'date': '09/01/2022', 'time': '14:24:00', 'producer': 3}
Message published successfully.

Message published successfully. Data: {'latitude': -36.6952, 'longitude': 144.7228, 'confidence': 84, 'surface_temperature_celcius': 57, 'date': '17/01/2022', 'time': '09:36:00', 'producer': 3}
Message published successfully. Data: {'latitude': -37.3466, 'longitude': 143.123, 'confidence': 66, 'surface_temperature_celcius': 43, 'date': '17/01/2022', 'time': '14:24:00', 'producer': 3}
Message published successfully. Data: {'latitude': -37.8254, 'longitude': 143.3904, 'confidence': 62, 'surface_temperature_celcius': 43, 'date': '17/01/2022', 'time': '19:12:00', 'producer': 3}
Message published successfully. Data: {'latitude': -38.0505, 'longitude': 142.4791, 'confidence': 57, 'surface_temperature_celcius': 40, 'date': '18/01/2022', 'time': '00:00:00', 'producer': 3}
Message published successfully. Data: {'latitude': -37.8701, 'longitude': 142.8066, 'confidence': 78, 'surface_temperature_celcius': 51, 'date': '18/01/2022', 'time': '04:48:00', 'producer': 3}
Message published successfully.

KeyboardInterrupt: 