# FIT3182 - Assignment 3
---
## Part B - Producer 1

- Filename: Assignment_PartB_Producer1.ipynb
- Student Name: Deeksha Sridhar
- Student ID: 32187998

In [17]:
import pymongo
from pymongo import MongoClient
from pprint import pprint
import pandas as pd
from datetime import datetime, timedelta
from time import sleep
from json import dumps
from kafka3 import KafkaProducer
import random


def read_climate_streaming():
    """
    Read the climate streaming data from the CSV file and preprocess it.
    Returns:
        list: List of dictionaries containing the preprocessed climate data.
    """
    climate_streaming = pd.read_csv('climate_streaming.csv')
    climate_streaming.rename(columns={'precipitation ': 'precipitation'}, inplace=True)
    climate_streaming['precipitation_flag'] = climate_streaming['precipitation'].str[-1]
    climate_streaming['precipitation'] = climate_streaming['precipitation'].str[0:-1]

    data = []
    for index, climate_row in climate_streaming.iterrows():
        document = {}
        document['latitude'] = float(climate_row['latitude'])
        document['longitude'] = float(climate_row['longitude'])
        document['air_temperature_celcius'] = int(climate_row['air_temperature_celcius'])
        document['relative_humidity'] = float(climate_row['relative_humidity'])
        document['windspeed_knots'] = float(climate_row['windspeed_knots'])
        document['max_wind_speed'] = float(climate_row['max_wind_speed'])
        document['precipitation'] = float(climate_row['precipitation'].strip())  # Remove leading and trailing spaces
        document['precipitation_flag'] = climate_row['precipitation_flag'].strip()
        document['GHI_w/m2'] = int(climate_row['GHI_w/m2'])
        data.append(document)

    return data


def get_latest_date():
    """
    Get the latest date from the collection.
    Returns:
        datetime: The latest date in the collection or current datetime if collection is empty.
    """
    latest_date = collection.aggregate([
        {"$sort": {"date": -1}},
        {"$project": {"_id": 0, "date": 1}},
        {"$limit": 1}
    ])
    for document in latest_date:
        latest_date = document['date']
        break
    else:
        latest_date = datetime.now()

    return latest_date


def publish_message(producer_instance, topic_name, data):
    """
    Publishes a message to the Kafka topic.
    Args:
        producer_instance (KafkaProducer): The Kafka producer instance.
        topic_name (str): Name of the Kafka topic.
        data (dict): Data to be published.
    """
    try:
        producer_instance.send(topic_name, value=data)
        print('Message published successfully. ' + str(data))
    except Exception as ex:
        print('Exception in publishing message.')
        print(str(ex))


def connect_kafka_producer():
    """
    Connects to the Kafka server and creates a Kafka producer instance.
    Returns:
        KafkaProducer: The Kafka producer instance.
    """
    _producer = None
    try:
        _producer = KafkaProducer(bootstrap_servers=['192.168.86.244:9092'],
                                  value_serializer=lambda x: dumps(x).encode('ascii'),
                                  api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting to Kafka.')
        print(str(ex))
    finally:
        return _producer


if __name__ == '__main__':
    topic = 'streaming'
    producer = connect_kafka_producer()
    data = read_climate_streaming()
    latest_date = get_latest_date() + timedelta(days=1)
    days_passed = 0

    while True:
        chosen_data = random.choice(data)
        curr_date = latest_date + timedelta(days=days_passed)
        chosen_data['producer'] = "climate_streaming"
        chosen_data["created_date"] = curr_date.strftime("%d/%m/%Y")
        publish_message(producer, topic, chosen_data)
        days_passed += 1
        sleep(10)


Message published successfully. {'latitude': -36.358, 'longitude': 143.113, 'air_temperature_celcius': 21, 'relative_humidity': 58.8, 'windspeed_knots': 8.8, 'max_wind_speed': 22.9, 'precipitation': 0.08, 'precipitation_flag': 'G', 'GHI_w/m2': 167, 'producer': 'climate_streaming', 'created_date': '02/01/2023'}
Message published successfully. {'latitude': -37.368, 'longitude': 148.05, 'air_temperature_celcius': 10, 'relative_humidity': 41.4, 'windspeed_knots': 9.4, 'max_wind_speed': 14.0, 'precipitation': 0.0, 'precipitation_flag': 'I', 'GHI_w/m2': 92, 'producer': 'climate_streaming', 'created_date': '03/01/2023'}
Message published successfully. {'latitude': -34.289, 'longitude': 141.712, 'air_temperature_celcius': 32, 'relative_humidity': 54.1, 'windspeed_knots': 12.8, 'max_wind_speed': 19.0, 'precipitation': 0.0, 'precipitation_flag': 'I', 'GHI_w/m2': 265, 'producer': 'climate_streaming', 'created_date': '04/01/2023'}
Message published successfully. {'latitude': -37.461, 'longitude': 

Message published successfully. {'latitude': -37.382, 'longitude': 149.341, 'air_temperature_celcius': 18, 'relative_humidity': 53.6, 'windspeed_knots': 7.2, 'max_wind_speed': 15.0, 'precipitation': 0.0, 'precipitation_flag': 'I', 'GHI_w/m2': 150, 'producer': 'climate_streaming', 'created_date': '29/01/2023'}
Message published successfully. {'latitude': -35.325, 'longitude': 143.497, 'air_temperature_celcius': 13, 'relative_humidity': 48.5, 'windspeed_knots': 5.3, 'max_wind_speed': 11.1, 'precipitation': 0.08, 'precipitation_flag': 'G', 'GHI_w/m2': 113, 'producer': 'climate_streaming', 'created_date': '30/01/2023'}
Message published successfully. {'latitude': -37.479, 'longitude': 143.358, 'air_temperature_celcius': 23, 'relative_humidity': 60.6, 'windspeed_knots': 10.1, 'max_wind_speed': 26.0, 'precipitation': 0.0, 'precipitation_flag': 'I', 'GHI_w/m2': 180, 'producer': 'climate_streaming', 'created_date': '31/01/2023'}
Message published successfully. {'latitude': -37.434, 'longitude'

KeyboardInterrupt: 