# FIT5148 - Distributed Databases and Big Data

# Group Assignment - Part C Visualisation of the data using Matplotlibe and Folium

**Your Details:**
- Name: Roopak Thiyyathuparambil Jayachandran
- StudentID: 29567467
- Email: rthi0002@student.monash.edu

## Introduction 

The portion of the assignment includes visualisation of the data stored in MongoDB during real-time streaming.

In [6]:
# Importing libraries
from pymongo import MongoClient
import pandas as pd
import folium
import pandas
from time import sleep
from kafka import KafkaConsumer
import datetime as dt
import matplotlib.pyplot as plt
%matplotlib notebook

For the incoming climate data plot the line graph of air temperature against arrival time. You need to label some interesting points such as maximum and minimum values.
For the task we will use Kakfa consumer to read the data from the stream and plot that using matplotlib in real time

In [14]:
topic = 'Climate'

# Consumer consumer
def connect_kafka_consumer():
    _consumer = None
    try:
         _consumer = KafkaConsumer(topic,
                                   consumer_timeout_ms=10000, # stop iteration if no message after 10 sec
                                   auto_offset_reset='earliest', # comment this if you don't want to consume earliest available message
                                   bootstrap_servers=['127.0.0.1:9092'],
                                   api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka')
        print(str(ex))
    finally:
        return _consumer

# Initial plot for before data comes in stream
def init_plots():
    try:
        width = 9.5
        height = 6
        fig = plt.figure(figsize=(width,height)) # create new figure
        ax = fig.add_subplot(111) # adding the subplot axes to the given grid position
        fig.suptitle('Real-time uniform stream data visualization') # giving figure a title
        ax.set_xlabel('Time')
        ax.set_ylabel('Air Temperature')
        ax.set_ylim(0,40) 
        ax.set_yticks([0,10,20,30,40])
        fig.show() # displaying the figure
        fig.canvas.draw() # drawing on the canvas
        return fig, ax
    except Exception as ex:
        print(str(ex))
    
def consume_messages(consumer, fig, ax):
    try:
        # container for x and y values
        x, y = [], []
        # print('Waiting for messages')
        for message in consumer:
            data = str(message.value.decode('utf-8')).split(',')
            if int(data[-1]) != 1: # Only for data from producer 1
                continue
            x.append(data[-2].split(" ")[1][:-5]) 
            y.append(int(data[2]))
            # print(y)
            # we start plotting only when we have 10 data points
            if len(y) > 10:
                ax.clear()
                ax.plot(x, y)
                ax.set_xlabel('Time')
                ax.set_ylabel('Value')
                ax.set_xticklabels(x,rotation=45)
                ax.set_ylim(0,40) 
                ax.set_yticks([0,10,20,30,40])
                fig.canvas.draw()
                x.pop(0) # removing the item in the first position
                y.pop(0)
        plt.close('all')
    except Exception as ex:
        print(str(ex))
    
if __name__ == '__main__':
    
    consumer = connect_kafka_consumer()
    fig, ax = init_plots()
    consume_messages(consumer, fig, ax)
    
    

<IPython.core.display.Javascript object>

Records with the top 10 number of fires. Plot a bar chart with time as the x-axis and number of fires as the y-axis.

In [2]:
# Establishing connection with mongodb
client = MongoClient()
db = client.fit5148
col = db.climate_data_model_4 # Change the collection accordingly

In [43]:
# Retrieving data from mongodb colletion
result = col.aggregate([
    {
        "$project":{
            "date": 1,
            "air_temperature": 1,
            "relative_humidity":1,
            "No Of Fire": { "$size":"$historic"}
        }
       
    }
])

# result has all records with number of fires
fire = sorted(result, key = lambda i:i['No Of Fire'], reverse = True)[:10]

date = []
hotspot = []
for each in fire:
    date.append(each['date'])
    hotspot.append(each['No Of Fire'])

In [46]:
# Plotting Frequency of Fire vs Date
plt.figure(figsize=(9.5,6))
plt.bar(date,hotspot)
plt.xticks(rotation = 45)
plt.ylabel("Frequency of Fire")
plt.xlabel("Date")
plt.title("Most frequent fire incidents")
plt.show()

<IPython.core.display.Javascript object>

Plot fire locations in the map with air temperature, surface temperature, relative humidity and confidence. See the example below.

In [21]:
# Retrieve some data from the collection, here using folium for plotting data in world map
cursor = col.aggregate([
    {"$unwind" : "$historic"},  
    {"$match": {"$or": [{"historic.confidence":{"$gt" : 80, "$lt":100}}]}},
    {"$project":{"_id" : 0,"historic.datetime" : 1,"air_temperature" : 1, "relative_humidity" : 1,
                 "historic.latitude" : 1, "historic.longitude" : 1,
                 "historic.surface_temperature_celsius" : 1, "historic.confidence" : 1}}
])

lat_list = []
lon_list = []
air_list = []
surface_list = []
confidence_list = []
relative_list = []

# Just taking 30 records for plotting
count = 0
for each in cursor:
    if count > 30:
        break
    else:
        count += 1
        air_list.append(each['air_temperature'])
        relative_list.append(each['relative_humidity'])
        historic = each['historic']
        lat_list.append(historic['latitude'])
        lon_list.append(historic['longitude'])
        surface_list.append(historic['surface_temperature_celsius'])
        confidence_list.append(historic['confidence'])

data = pd.DataFrame({
    'lat': lat_list,
    'lon': lon_list,
    'air_temperature': air_list,
    'surface_temperature': surface_list,
    'confidence': confidence_list,
    'relative_humidity': relative_list
})

m = folium.Map(location = [20,0], tiles="Mapbox Bright", zoom_start=4)

for i in range(0,len(data)):
    # pop up information when hovered over the marks
    pop = ", Surface Temperature : "+ str(data.iloc[i]['surface_temperature']) + ", Air Temperature :" + str(data.iloc[i]['air_temperature']) + \
    ", Confidence : " + str(data.iloc[i]['confidence']) + " Relative Humidity : " + str(data.iloc[i]['relative_humidity'])
    folium.Marker([data.iloc[i]['lat'], data.iloc[i]['lon']], tooltip=pop).add_to(m)
m


In [29]:
# Also using gmplot to Visualise the same
import gmplot

cursor = col.aggregate([
    {"$unwind" : "$historic"},  
    {"$match": {"$or": [{"historic.confidence":{"$gt" : 80, "$lt":100}}]}},
    {"$project":{"_id" : 0,"historic.datetime" : 1,"air_temperature" : 1, "relative_humidity" : 1,
                 "historic.latitude" : 1, "historic.longitude" : 1,
                 "historic.surface_temperature_celsius" : 1, "historic.confidence" : 1}}
])

lat_list = []
lon_list = []
air_list = []
surface_list = []
confidence_list = []
relative_list = []

count = 0
for each in cursor:
    if count > 10:
        break
    else:
        count += 1
        air_list.append(each['air_temperature'])
        relative_list.append(each['relative_humidity'])
        historic = each['historic']
        lat_list.append(historic['latitude'])
        lon_list.append(historic['longitude'])
        surface_list.append(historic['surface_temperature_celsius'])
        confidence_list.append(historic['confidence'])
        
gmap = gmplot.GoogleMapPlotter(lat_list[0], lon_list[0], 7)
gmap.scatter(lat_list, lon_list,'#ff4500',size = 2, marker = False)
gmap.draw("mymap.html")