# Big Data - Dashboard
## ALBICHARI Kaïs - D'HOSE Tanguy - ULB

In [1]:
from pyspark import SparkContext, SparkConf
from pyspark.streaming import StreamingContext
import sys
import os
from datetime import datetime
from pathlib import Path
from kafka import KafkaConsumer
from kafka.errors import KafkaError

In [2]:
import headtail
headtail.head('data/data.conv.txt', 5)

['2017-03-31 03:38:16.508 1-0 122.153 2.03397\n',
 '2017-03-31 03:38:15.967 1-1 -3.91901 2.09397\n',
 '2017-03-31 03:38:16.577 1-2 11.04 2.07397\n',
 '2017-02-28 00:59:16.359 1-0 19.9884 2.74964\n',
 '2017-02-28 00:59:16.803 1-1 37.0933 2.76964\n']

In [16]:
MUNICIPALITIES = ["1000", "1030", "1040", "1050", "1060", "1070", "1080", "1081", "1082", "1083", \
                  "1090", "1140", "1150", "1160", "1170", "1180", "1190", "1200", "1210"]


In [3]:
sc = SparkContext("local[*]", "test")
sc.setLogLevel("WARN")   #Make sure warnings and errors observed by spark are printed.

ssc = StreamingContext(sc, 5)  #generate a mini-batch every 5 seconds

In [34]:
def parseRow(line):
    '''parses a single line into a dictionary'''
    values = line.value.decode().split(" ")
    try:
        return [{"topic": line.topic,
                 "time": datetime.strptime(values[0] + " "+ values[1], "%Y-%m-%d %H:%M:%S.%f"),
                 "p-i": values[2],
                 "measurement": float(values[3]),
                 "voltage": float(values[4])}]
    except Exception as err:
        print("Unexpected error: %s" % (err))


def checkAttributes(sensor_type, space_tag, time_tag):
    if sensor_type < 0 or sensor_type > 3:
        print("Sensor type value is not supported. Give a value in [0,3]")
        return False
    elif space_tag < 0 or space_tag > 2:
        print("Space tag value is not supported. Give a value in [0,2].")
        print("0: grouped per space")
        print("1: grouped per municipality")
        print("2: grouped for the entirety of Brussels")
        return False
    elif time_tag <0 or time_tag > 4:
        print("Time tag value is not supported. Give a value in [0,4].")
        print("0: last 24h")
        print("1: last 2 days")
        print("2: last week")
        print("3: last month")
        print("4: last year")
        return False
    else:
        return True
        

def basicStats(sensor_type, space_tag, time_tag):
    """
    Basic statistics (min, max, avg) about sensor readings grouped according to different granularities
    in space and time.
    sensor_type: type of the sensor (0 to 3)
    space_tag: defines granularity in space (0 to 2)
    time_tag: defines granularity in time (0 to 4)
    """
    if checkAttributes(sensor_type, space_tag, time_tag):
        try:
            consumer = KafkaConsumer(bootstrap_servers = ['localhost:9092'])
            topics = MUNICIPALITIES[:]
            for i in range (len(topics)):
                topics[i] += "-"+str(sensor_type)
            print(topics)
            consumer.subscribe(topics)
            for i in range (5):
                row = next(consumer)
                print(parseRow(row))

        except Exception as err:
             print("Unexpected error: %s" % (err))
        finally:
            consumer.close()



basicStats(1,2,0)

2017-02-28 00:50:48.907000
['1000-1', '1030-1', '1040-1', '1050-1', '1060-1', '1070-1', '1080-1', '1081-1', '1082-1', '1083-1', '1090-1', '1140-1', '1150-1', '1160-1', '1170-1', '1180-1', '1190-1', '1200-1', '1210-1']
[{'topic': '1082-1', 'time': datetime.datetime(2017, 2, 28, 0, 54, 21, 54000), 'p-i': '24-1', 'measurement': 42.3505, 'voltage': 2.69532}]
[{'topic': '1190-1', 'time': datetime.datetime(2017, 2, 28, 0, 54, 20, 430000), 'p-i': '48-1', 'measurement': 42.0489, 'voltage': 2.70532}]
[{'topic': '1190-1', 'time': datetime.datetime(2017, 2, 28, 0, 54, 20, 675000), 'p-i': '1-1', 'measurement': 39.2123, 'voltage': 2.77742}]
[{'topic': '1030-1', 'time': datetime.datetime(2017, 2, 28, 0, 54, 21, 558000), 'p-i': '35-1', 'measurement': 41.2425, 'voltage': 2.64639}]
[{'topic': '1190-1', 'time': datetime.datetime(2017, 2, 28, 0, 54, 21, 815000), 'p-i': '3-1', 'measurement': 39.0082, 'voltage': 2.72532}]
