## FIT3182 - Assignment 2
---
### Part B - Producer 2

**Information:**
- Filename: Assignment_PartB_Producer2.ipynb
- Student Name: Nicholas Mandylas
- Student Number: 27840328
- Student Email: nman48@student.monash.edu

In [1]:
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import pandas

# Reading data from CSV
def readCSV():
    climate_streaming_data = pandas.read_csv('hotspot_AQUA_streaming.csv')
    streaming_data = []
    for _, row in climate_streaming_data.iterrows(): # Iterate through each row in the CSV
        data_point = {} # Create dictionary for individual row & format data appropriately.
        data_point['latitude'] = float(row['latitude'])
        data_point['longitude'] = float(row['longitude'])
        data_point['confidence'] = float(row['confidence'])
        data_point['surface_temperature_celcius'] = float(
            row['surface_temperature_celcius'])

        streaming_data.append(data_point)

    return streaming_data


def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, value=data)
        producer_instance.flush()
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))


def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer( # Added serializer on the producer, which will automatically serialize to JSON string format.
            bootstrap_servers=['localhost:9092'], value_serializer=lambda x: dumps(x).encode('ascii'), api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer


if __name__ == '__main__':

    data = readCSV() # Read all data from CSV at one time.
    topic = 'Hotspot_AQUA'
    producer = connect_kafka_producer()
    created_date = dt.datetime(2019, 1, 1) # Initial date

    count = 0

    while True:
        count += 4

        random_number = random.randrange(0, len(data)) # Pick random aqua data point.
        selected_data = data[random_number]

        if count > 16: # As data stream is expected to be sent every 2 seconds (10 seconds total for a single day), iterate up to 6, counting by 4.
            created_date += dt.timedelta(days=1) # Set to next day & reset time back to 0 hours, 0 minutes & 0 seconds.
            created_date.replace(hour=0, minute=0, second=0)
            count = 0

        # Every two seconds, I select a random hour (in the space of 4 hours), random minutes & seconds, to make a more simulated flow of data.
        # As the count increases (as it does 4), the next two seconds, will be a random time, keeping the output in sequential & random order.
        created_time = created_date + dt.timedelta(
            hours=(random.randrange(count - 4, count)),
            minutes=(random.randrange(0, 60)),
            seconds=(random.randrange(0, 60)))
        selected_data['created_time'] = created_time.isoformat()
        # print(selected_data['created_time'].strftime("%m/%d/%Y, %H:%M:%S"))
        selected_data['producer_id'] = 'producer_hotspot_aqua'

        publish_message(producer, topic, selected_data)

        sleep(2)


Message published successfully. Data: {'latitude': -36.5039, 'longitude': 142.8384, 'confidence': 74.0, 'surface_temperature_celcius': 48.0, 'created_time': '2019-01-01T03:26:15', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. Data: {'latitude': -37.7105, 'longitude': 148.6047, 'confidence': 77.0, 'surface_temperature_celcius': 56.0, 'created_time': '2019-01-01T06:01:54', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. Data: {'latitude': -37.976, 'longitude': 145.649, 'confidence': 88.0, 'surface_temperature_celcius': 64.0, 'created_time': '2019-01-01T11:15:59', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. Data: {'latitude': -36.599, 'longitude': 144.6498, 'confidence': 76.0, 'surface_temperature_celcius': 49.0, 'created_time': '2019-01-01T12:27:54', 'producer_id': 'producer_hotspot_aqua'}
Message published successfully. Data: {'latitude': -36.6672, 'longitude': 143.8631, 'confidence': 79.0, 'surface_temperatur

KeyboardInterrupt: 