## FIT3182 - Assignment 2
---
### Part B - Producer 3

**Information:**
- Filename: Assignment_PartB_Producer3.ipynb
- Student Name: Nicholas Mandylas
- Student Number: 27840328
- Student Email: nman48@student.monash.edu

In [1]:
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas


def readCSV():
    climate_streaming_data = pandas.read_csv('hotspot_TERRA_streaming.csv')
    streaming_data = []
    for _, row in climate_streaming_data.iterrows(): # Iterate through each row in the CSV
        data_point = {} # Create dictionary for individual row & format data appropriately.
        data_point['latitude'] = float(row['latitude'])
        data_point['longitude'] = float(row['longitude'])
        data_point['confidence'] = float(row['confidence'])
        data_point['surface_temperature_celcius'] = float(
            row['surface_temperature_celcius'])

        streaming_data.append(data_point)

    return streaming_data


def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))


def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer( # Added serializer on the producer, which will automatically serialize to JSON string format.
            bootstrap_servers=['localhost:9092'], value_serializer=lambda x: dumps(x).encode('ascii'), api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer


if __name__ == '__main__':

    data = readCSV()
    topic = 'Hotspot_TERRA'
    producer = connect_kafka_producer()
    created_date = dt.datetime(2019, 1, 1)

    count = 0

    while True:
        count += 4

        random_number = random.randrange(0, len(data))
        selected_data = data[random_number]

        if count > 16: # As data stream is expected to be sent every 2 seconds (10 seconds total for a single day), iterate up to 6, counting by 4.
            created_date += dt.timedelta(days=1) # Set to next day & reset time back to 0 hours, 0 minutes & 0 seconds.
            created_date.replace(hour=0, minute=0, second=0)
            count = 0

        # Every two seconds, I select a random hour (in the space of 4 hours), random minutes & seconds, to make a more simulated flow of data.
        # As the count increases (as it does 4), the next two seconds, will be a random time, keeping the output in sequential & random order.
        created_time = created_date + dt.timedelta(
            hours=(random.randrange(count - 4, count)),
            minutes=(random.randrange(0, 60)),
            seconds=(random.randrange(0, 60)))
        selected_data['created_time'] = created_time.isoformat()
        # print(selected_data['created_time'].strftime("%m/%d/%Y, %H:%M:%S"))
        selected_data['producer_id'] = 'producer_hotspot_terra'

        publish_message(producer, topic, selected_data)

        sleep(2)


Message published successfully. Data: {'latitude': -36.494, 'longitude': 145.4522, 'confidence': 63.0, 'surface_temperature_celcius': 41.0, 'created_time': '2019-01-01T03:11:51', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -37.166, 'longitude': 148.9436, 'confidence': 75.0, 'surface_temperature_celcius': 49.0, 'created_time': '2019-01-01T06:33:04', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -34.8404, 'longitude': 143.1806, 'confidence': 78.0, 'surface_temperature_celcius': 55.0, 'created_time': '2019-01-01T11:06:09', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -38.294, 'longitude': 143.7227, 'confidence': 60.0, 'surface_temperature_celcius': 41.0, 'created_time': '2019-01-01T14:21:12', 'producer_id': 'producer_hotspot_terra'}
Message published successfully. Data: {'latitude': -36.5871, 'longitude': 144.961, 'confidence': 86.0, 'surface_tempera