# 1. Producing the data
Send one record of Streaming_Pedestrian_December_counts_per_hour.csv (row-by-row) every 0.5 second to the Kafka stream.
- Append the timestamp into the object to be sent.  
All the data are in original String format, without changing to any datetime format.

In [1]:
# Code referenced and taken from Week 11 Labs
# import statements
from time import sleep
from json import dumps
from kafka import KafkaProducer
import random
import datetime as dt
import csv

# Opens the csv file and read it. Appends the entire content into a list that is returned
def readCSVFile(fileName):
    listFromReader=[]
    with open(fileName, 'rt') as f:
        reader = csv.DictReader(f)
        for row in reader:
            listFromReader.append(row)

    return listFromReader

# publish the data to the producer instance with the provided topic name
def publish_message(producer_instance, topic_name, data):
    try:
        producer_instance.send(topic_name, data)
        print('Message published successfully. Data: ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))

# connects to the kafka producer and returns it
def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'],
                                  value_serializer=lambda x: dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka.')
        print(str(ex))
    finally:
        return _producer
    
if __name__ == '__main__':
   
    topic = 'pedestrian_count'
    list_ped_count = readCSVFile('Streaming_Pedestrian_December_counts_per_hour.csv')    
    
    print('Publishing records..')
    producer = connect_kafka_producer()
    
    for i in range(len(list_ped_count)):    # loop through the list from the csv file to publish each row every 0.5 seconds
        current_obj = list_ped_count[i]
        
        #define the current timestamp
        timeStamp = {'Timestamp': int(dt.datetime.now().timestamp())}
        
        #append the timestamp into the object to be sent
        data=dict(current_obj,**timeStamp)
        
        publish_message(producer, topic, data)
        
        sleep(0.5)     # sleep for 0.5 seconds

Publishing records..
Message published successfully. Data: {'ID': '3435630', 'Date_Time': '12/01/2020 08:00:00 AM', 'Year': '2020', 'Month': 'December', 'Mdate': '1', 'Day': 'Tuesday', 'Time': '8', 'Sensor_ID': '39', 'Sensor_Name': 'Alfred Place', 'Hourly_Counts': '83', 'Timestamp': 1622017507}
Message published successfully. Data: {'ID': '3435798', 'Date_Time': '12/01/2020 11:00:00 AM', 'Year': '2020', 'Month': 'December', 'Mdate': '1', 'Day': 'Tuesday', 'Time': '11', 'Sensor_ID': '12', 'Sensor_Name': 'New Quay', 'Hourly_Counts': '86', 'Timestamp': 1622017508}
Message published successfully. Data: {'ID': '3435107', 'Date_Time': '12/01/2020 12:00:00 AM', 'Year': '2020', 'Month': 'December', 'Mdate': '1', 'Day': 'Tuesday', 'Time': '0', 'Sensor_ID': '4', 'Sensor_Name': 'Town Hall (West)', 'Hourly_Counts': '78', 'Timestamp': 1622017509}
Message published successfully. Data: {'ID': '3435108', 'Date_Time': '12/01/2020 12:00:00 AM', 'Year': '2020', 'Month': 'December', 'Mdate': '1', 'Day': '

KeyboardInterrupt: 