In [1]:
from __future__ import unicode_literals
from kafka import KafkaConsumer, KafkaProducer
import json
from time import sleep
import csv

In [2]:
def publish_message(producer_instance, topic_name, key, value):
    try:
        #print(key)
        #print(value)
        key_bytes =  bytes(key, encoding='utf-8')   #.encode('utf-8')
        value_bytes = bytes(value, encoding='utf-8') #encode('utf-8')
        
        producer_instance.send(topic_name, key=key_bytes, value=value_bytes)
        #print(value_bytes)
        producer_instance.flush()
        print('Message published successfully.')
    except Exception as ex:
        print('Exception in publishing message')
        print(str(ex))


def connect_kafka_producer():
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['localhost:9092'], api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka')
        print(str(ex))
    finally:
        return _producer

In [None]:
def load_station_coordinates(station_csv_path):
    coord_lookup = {}
    with open(station_csv_path, mode='r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            station_id = row['id']
            lat = row['lat']
            lon = row['long']
            coord_lookup[station_id] = (lat, lon)
    return coord_lookup
    
def send_trip_data_with_coords(trip_csv_path, station_csv_path, topic):
    coord_lookup = load_station_coordinates(station_csv_path)
    producer = connect_kafka_producer()
    
    with open(trip_csv_path, mode='r') as f:
        reader = csv.reader(f)
        header = next(reader)
        for i, row in enumerate(reader):
            if len(row) < 8:
                continue 

            start_station_id = row[4]
            end_station_id = row[7]

            start_coords = coord_lookup.get(start_station_id, ("", ""))
            end_coords = coord_lookup.get(end_station_id, ("", ""))

            enriched_row = row + list(start_coords) + list(end_coords)
            enriched_line = ",".join(enriched_row)
            publish_message(producer, topic, str(i), enriched_line)
            print(enriched_line)
            sleep(1)

    producer.close()

if __name__ == '__main__':
    topic = 'trip_data'
    trip_csv_path = 'trip.csv'
    station_csv_path = 'station.csv'

    send_trip_data_with_coords(trip_csv_path, station_csv_path, topic)

In [None]:
# if __name__ == '__main__':
#     headers = {
#         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
#         'Pragma': 'no-cache'
#     }
#     url =  'https://www.allrecipes.com/recipes/702/world-cuisine/asian/thai/'
#     all_recipes = get_recipes(url)
#     #print( all_recipes )
    
     
#     if len(all_recipes) > 0:
#         kafka_producer = connect_kafka_producer()
#         for count, recipe in enumerate(all_recipes, start=1):
             
#             publish_message(kafka_producer, 'raw_recipes', 'raw', recipe.strip())
#             if count > 3:
#                 break
#         if kafka_producer is not None:
#             kafka_producer.close()