## DATA VISUALISATION- Static and Streaming <hr />

The Data Visualisation part will deal with getting useful insights from the data. this is essential when the load of the data is enormous and aggregation and showcasing the data in human readable and understandable format is quintessential. The Data Visualization is divided into 2 parts:
<ol>
        <li><b>Streaming Data Visualisation</b> : This part of Visualization will be useful for plotting the streamed data as and when it is streamed for climate data and showcase insights in terms of highest and lowest air temperatures for the streamed data</li>
    <li><b>Static Data Visualisation</b> : This part of Visualisationis responsible for plotting and extracting insights from the data already stored in the database. This task involves querying the database and plotting visualisations. The first one being plotting the top 10 number of fires with respect to time and the second one plotting the occurences of the fires by making use of latitude and longitude information from the data partitions stored in the database and plot them on a map. We will also provide addtional information in terms of air temperature, relative humidity, surface temperature and confidence. These will be shown on hovering over the tagged location markers on the map.</li>
</ol>

### 1. Streaming Visualisation <hr />

In [None]:
# import libraries
from time import sleep
from kafka import KafkaConsumer
import datetime as dt
import matplotlib.pyplot as plt
import ast

%matplotlib notebook

# topic for the visualiser to connect to. the visualiser will consume data from the streamed to this topic
topic = 'test'

# function to annotate maximum point in the streamed partition of data
def annotate_max(x, y, ax = None):
    ymax = max(y)
    xpos = y.index(ymax)
    xmax = x[xpos]
    text = 'Max: Time={}, Value={}'.format(xmax, ymax)
    if not ax:
        ax=plt.gca()
    ax.annotate(text, xy=(xmax, ymax), xytext=(xmax, ymax+5), arrowprops=dict(facecolor='red', shrink=0.05),)
    
# function to annotate the minimum point in the streamed partition of data
def annotate_min(x, y, ax = None):
    ymin = min(y)
    xpos = y.index(ymin)
    xmin = x[xpos]
    text = 'Min: Time={}, Value={}'.format(xmin, ymin)
    if not ax:
        ax=plt.gca()
    ax.annotate(text, xy=(xmin, ymin), xytext=(xmin, ymin+5), arrowprops=dict(facecolor='orange', shrink=0.05),)

# function to connect to the kafka server instance and return the instance of the kafka consumer. this instance will be used
# passed to consume message function to parse the messages and pass the parsed messages to the visualiser
def connect_kafka_consumer():
    _consumer = None
    try:
         _consumer = KafkaConsumer(topic,
                                   consumer_timeout_ms=10000, # stop iteration if no message after 10 sec
                                   # auto_offset_reset='earliest', # comment this if you don't want to consume earliest available message
                                   bootstrap_servers=['localhost:9092'],
                                   api_version=(0, 10))
    except Exception as ex:
        print('Exception while connecting Kafka')
        print(str(ex))
    finally:
        return _consumer

# function to create a container with specified shape and size. this container will be used to plot various data points 
# and showcase points of interest.
def init_plots():
    try:
        width = 9.5
        height = 6
        fig = plt.figure(figsize=(width,height)) # create new figure
        #fig.subplots_adjust(hspace=0.8)
        # ax = fig.add_subplot(111) # adding the subplot axes to the given grid position
        ax2 = fig.add_subplot(111)
        ax2.set_xlabel('Time')
        ax2.set_ylabel('Air temperature')
        ax2.title.set_text('Arrival Time Vs Air temperature')
        fig.suptitle('Real-time uniform stream data visualization with interesting points') # giving figure a title
        fig.show() # displaying the figure
        fig.canvas.draw() # drawing on the canvas
        return fig, ax2
    except Exception as ex:
        print(str(ex))
    
# function to consume the messages recieved from the producer from the specified stream.
def consume_messages(consumer, fig, ax2):
    try:
        # container for x and y values
        x2, y2 = [], []
        # print('Waiting for messages')
        for message in consumer:
            
            # converting the read partition from string to a dictionary to check whether the data chunk belongs to Producer 1.
            # this is important because we are publishing data from all the producers to a single topic and streaming from the same topic.
            # therefore we have to filter data from only produer1 to plot the insights desired.
            each_clim = ast.literal_eval(message.value.decode('utf-8')) 
            if each_clim["sender_id"] == "climate_producer_1":
                temp = each_clim["air_temperature_celcius"]
                
                # appeding arrival time for each data partition
                x2.append(dt.datetime.now().strftime("%X")) 
                
                # appending air temperature from the extracted data chunk to list
                y2.append(temp)
                
                # we will start producing plots if the number of recieved chunks exceeds 5 so that it gives a moving functionality
                if len(y2)>5:
                    ax2.clear()
                    
                    # producing a line plot for list of values attained previously
                    ax2.plot(x2, y2, color="green")
                    # ax2.plot(x2, y2)
                    ax2.set_xlabel('Arrival Time')
                    ax2.set_ylabel('Air temperature')
                    ax2.set_title('Arrival Time Vs Value')
                    ax2.set_ylim(0,100) 
                    ax2.set_yticks([0,20,40,60,80,100])
                    
                    # calling the maximum and minimum function to annotate data points in the streaming plot
                    annotate_max(x2, y2, ax2)
                    annotate_min(x2, y2, ax2)

                    fig.canvas.draw()
                    
                    # removing the item in the first position
                    x2.pop(0) 
                    y2.pop(0)
        plt.close('all')
    except Exception as ex:
        print(str(ex))
    
if __name__ == '__main__':
    
    # Getting an instance of the kafka consumer
    consumer = connect_kafka_consumer()
    
    # initiate the container for plot
    fig, ax2 = init_plots()
    
    # call the consume message to plot the streamed data in the initiated canvas.
    consume_messages(consumer, fig, ax2)
    
    

### 2. Static Visualisation <hr />

In [None]:
# Import all the essential libraries. 
import matplotlib.pyplot as pyplt
import pandas as pd
import numpy as np
import datetime
import webbrowser
from pymongo import MongoClient
import gmplot
# Google’s geocoding service enabling map initilization to the location of your choice.
gmap = gmplot.GoogleMapPlotter(-37.812015244225677, 144.951471202974, 15) # latitude and longitude of melbourne city

#### 2.1 Top 10 Fires - Records with the top 10 number of fires. Plot a bar chart with time as the x-axis and number of fires as the y-axis.

In [None]:
# instantiating mongo client.
client = MongoClient()

# instantiating the database
db = client.assignment2

# instantiating the collections in the created database.
fire = db.fire
climate = db.climate

# Preparing the data for plotting
unoccupiedList = []

# aggregate the fire data to count the number of plots in a given time.
results = fire.aggregate(
    [
        {"$group":{
            "_id":"$datetime", "count":{"$sum":1}
        }
        },
        {"$sort": {"count":-1}}
        ,
        {"$limit":10}])

# store the output.
res = [x for x in results]

# use two list to store the plot data.
time = []
no_of_fire = []
for each in res:
    time.append(str(each["_id"].time())) # extract just the time stamp for plotting. 
    no_of_fire.append(int(each["count"]))

In [None]:
# use of matplotlib.pyplot to plot top 10 fires.
index = np.arange(len(time))
pyplt.bar(index,no_of_fire)
pyplt.xlabel('Time',fontsize= 10)
pyplt.ylabel('Number of Fires',fontsize=10)
pyplt.xticks(index,time,fontsize=10,rotation=30)
pyplt.title('Top 10 Number of fires',fontsize=20)
pyplt.rcParams["figure.figsize"] = [16,9]
pyplt.figure(figsize=[10,10])
pyplt.show()

#### 2.2 Plot fire locations in the map with air temperature, surface temperature, relative humidity and confidence. See the example below.

In [None]:
# store the latitude and longitude in a list.
lat = []
lon = []

# using aggregate function to join both the collections.
res = fire.aggregate([{
    "$lookup":
    {
        "from": "climate",
        "localField": "date",
        "foreignField": "date",
        "as":"climate"
    }},
    {"$project":{"surface_temperature_celcius":1,"confidence":1,"latitude":1,"longitude":1,"climate" : 
               { "air_temperature_celcius":1,"relative_humidity":1}}},
    {"$unwind": "$climate"}
    ])

# storing the output in a variable 
res = [x for x in res]


#Looping through all the data to add markers
for row in res:
    title_str = "relative humidity:"+ str(row["climate"]["relative_humidity"]) +","+ " surf-temp:"+str(row["surface_temperature_celcius"])+","+" air-temp:"+str(row["climate"]["air_temperature_celcius"])+","+" confidence:"+str(row["confidence"]) 
    gmap.marker(row["latitude"],row["longitude"],title = title_str)
    lat.append(float(row['latitude'])) # storing latitude in a list
    lon.append(float(row['longitude'])) # storing longitude in a list


In [None]:
# Plotting the points on the map
gmap.scatter(lat,lon, '#FF4500', size=10, marker=True)

In [None]:
# Drawing the map
gmap.draw("Fire_location.html")

# opens the html document.
webbrowser.open_new("Fire_location.html")