In [80]:
# import statements
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas as pd
import json

# read the climate_streaming.csv file
climateData = pd.read_csv("climate_streaming.csv")

# change to json object 
result_list = climateData.to_dict(orient = "records")


In [81]:
# sample data
result_list[1]

{'latitude': -38.038000000000004,
 'longitude': 142.986,
 'air_temperature_celcius': 15,
 'relative_humidity': 50.7,
 'windspeed_knots': 9.2,
 'max_wind_speed': 13.0,
 'precipitation ': ' 0.02G',
 'GHI_w/m2': 128}

In [82]:
def publish_message(producer_instance, topic_name, key, data):
    try:
        key_bytes = bytes(key, encoding='utf-8')
        producer_instance.send(topic_name, key=key_bytes, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))

In [83]:
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x:dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer

In [84]:
if __name__ == '__main__':
   
    topic = 'partB'
    
    # latest date from climate data in Part A Task 2. Change to datetime type
    latestDate_str = "2021-12-31"
    latestDate = dt.datetime.strptime(latestDate_str, "%Y-%m-%d")
    
    # get kafka producer instance
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    while True:
    
        # get index of data from result_list randomly
        index = random.randrange(0,len(result_list))
        # get the data fron the list
        data = result_list[index]
        
        # append 1 day from the latest date and store date and time into the selected data
        latestDate = latestDate + dt.timedelta(days=1)
        data["date"] = latestDate.strftime("%d/%m/%Y")
        data["time"] = dt.datetime.now().strftime("%X")
        
        # add station key value to the data 
        data["station"] = "948700"
        # add producer information 
        data["producer"] = 1
        
        # publish data every 10 seconds
        publish_message(producer, topic, 'Climate', data)
        sleep(10)
        
    

Publishing records..
Message published successfully. Data: {'latitude': -36.827, 'longitude': 142.5446, 'air_temperature_celcius': 8, 'relative_humidity': 39.3, 'windspeed_knots': 4.7, 'max_wind_speed': 13.0, 'precipitation ': ' 0.02G', 'GHI_w/m2': 75, 'date': '01/01/2022', 'time': '04:18:50', 'station': '948700', 'producer': 1}
Message published successfully. Data: {'latitude': -37.459, 'longitude': 148.092, 'air_temperature_celcius': 13, 'relative_humidity': 48.5, 'windspeed_knots': 7.1, 'max_wind_speed': 15.9, 'precipitation ': ' 0.00G', 'GHI_w/m2': 113, 'date': '02/01/2022', 'time': '04:19:00', 'station': '948700', 'producer': 1}
Message published successfully. Data: {'latitude': -36.9194, 'longitude': 143.6131, 'air_temperature_celcius': 19, 'relative_humidity': 50.3, 'windspeed_knots': 7.7, 'max_wind_speed': 11.1, 'precipitation ': ' 0.00I', 'GHI_w/m2': 162, 'date': '03/01/2022', 'time': '04:19:10', 'station': '948700', 'producer': 1}
Message published successfully. Data: {'latit

KeyboardInterrupt: 