## Event Producer 1 
### Simulating real-time data using Apache Kafka Producer

Required library: geohash
Installation command: pip3 install geohash


First, import all required libraries. 


In [1]:
import pymongo
from pymongo import MongoClient
from datetime import datetime, date
from pprint import pprint
import geohash

then initial the data type for each columns in two input files.

In [2]:
integer_attr = ['air_temperature_celcius', 'surface_temperature_celcius', 'confidence', ]
float_attr = ['relative_humidity', 'windspeed_knots', 'max_wind_speed', 'latitude', 'longitude']
date_arrt = ['date']
datetime_attr = ['datetime']

The following function will read the CSV data and transform each record into a python dict by using the column name on the first line as a key for the python dictionary. Then all dictionary object of record will be stored in a list.


In [3]:
def csv2dict(file_path, delim):
    
    header_names = []
    is_header_first = True
    
    result = []
    
    for line in open(file_path):
        tmp_row = {}
        
        line = line.strip()
        
        if len(line) == 0:
            continue
            
        if is_header_first:
            header_names = line.split(delim)
            is_header_first = False
            continue
                
        tmp_data = line.split(delim)

        for i in range(len(header_names)):
            #the string data will be re-cating into the right data type using the column name and data type that declared on above  
            if header_names[i].strip() in integer_attr:
                tmp_row[header_names[i].strip()] = int(tmp_data[i].strip())
            elif header_names[i].strip() in float_attr:
                tmp_row[header_names[i].strip()] = float(tmp_data[i].strip())
            elif header_names[i].strip() in date_arrt:
                tmp_row[header_names[i].strip()] = datetime.strptime(tmp_data[i].strip(), '%d/%m/%Y')
            elif header_names[i].strip() in datetime_attr:
                tmp_row[header_names[i].strip()] = datetime.strptime(tmp_data[i].strip(), '%Y-%m-%dT%H:%M:%S')
            else :
                tmp_row[header_names[i].strip()] = tmp_data[i].strip()
                
        result.append(tmp_row)
        
    return result

In [4]:
climate_streaming = "./climate_streaming.csv"
climate_streaming_dict = csv2dict(climate_streaming, ',')

In [5]:
for item in climate_streaming_dict:
    item.update({"sender_id": "1"})
    item.update({"geo_hash": geohash.encode(item["latitude"], item["longitude"], precision=5)})
    

#### Main Function

In [None]:
# import statements
from time import sleep
import json
from kafka import KafkaProducer
import random
import datetime


lines = climate_streaming_dict

def publish_message(producer_instance, topic_name, key, value):
    try:
        key_bytes = bytes(key, encoding='utf-8')
        value_bytes = bytes(value, encoding='utf-8')
        producer_instance.send(topic_name, key=key_bytes, value=value_bytes)
        producer_instance.flush()
        print('Message published successfully. Data: ' + json.dumps(line))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))
        
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['127.0.0.1:9092'],
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer
    
if __name__ == '__main__':
   
    topic = 'fire'
    
    print('Publishing records..')
    producer = connect_kafka_producer()
    while(True):
        line = lines[random.randrange(len(lines))]
        line.update({"created_time":str(datetime.datetime.now())})  
#     print(line)
        publish_message(producer, topic, 'parsed', json.dumps(line))
        sleep(5)

Publishing records..
Message published successfully. Data: {"windspeed_knots": 9.5, "longitude": 143.1062, "air_temperature_celcius": 17, "created_time": "2019-05-24 15:13:21.257087", "max_wind_speed": 20.0, "latitude": -37.8147, "precipitation": "0.00I", "relative_humidity": 46.4, "geo_hash": "r1mbb", "sender_id": "1"}
Message published successfully. Data: {"windspeed_knots": 7.2, "longitude": 141.088, "air_temperature_celcius": 9, "created_time": "2019-05-24 15:13:26.276021", "max_wind_speed": 9.9, "latitude": -35.957, "precipitation": "0.00I", "relative_humidity": 42.4, "geo_hash": "r1s76", "sender_id": "1"}
Message published successfully. Data: {"windspeed_knots": 16.1, "longitude": 144.17, "air_temperature_celcius": 18, "created_time": "2019-05-24 15:13:31.288187", "max_wind_speed": 21.0, "latitude": -37.863, "precipitation": "0.00G", "relative_humidity": 57.5, "geo_hash": "r1q88", "sender_id": "1"}
Message published successfully. Data: {"windspeed_knots": 7.7, "longitude": 147.17

Message published successfully. Data: {"windspeed_knots": 6.7, "longitude": 143.1189, "air_temperature_celcius": 12, "created_time": "2019-05-24 15:15:41.628271", "max_wind_speed": 11.1, "latitude": -36.4125, "precipitation": "0.00G", "relative_humidity": 47.9, "geo_hash": "r1tbb", "sender_id": "1"}
Message published successfully. Data: {"windspeed_knots": 7.8, "longitude": 143.8907, "air_temperature_celcius": 21, "created_time": "2019-05-24 15:15:46.645426", "max_wind_speed": 13.0, "latitude": -36.0459, "precipitation": "0.00I", "relative_humidity": 58.7, "geo_hash": "r1w6f", "sender_id": "1"}
Message published successfully. Data: {"windspeed_knots": 8.7, "longitude": 149.261, "air_temperature_celcius": 26, "created_time": "2019-05-24 15:15:51.667615", "max_wind_speed": 15.0, "latitude": -37.627, "precipitation": "0.00I", "relative_humidity": 53.7, "geo_hash": "r361u", "sender_id": "1"}
Message published successfully. Data: {"windspeed_knots": 9.3, "longitude": 144.3142, "air_temperat