# Big Data Dashboard

In [1]:
import seaborn as sns
from ipywidgets import interact

sns.set(style="darkgrid")

In [2]:
tips = sns.load_dataset('tips')

In [3]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [4]:
@interact(hue=['smoker', 'sex', 'time', 'day'])
def plot(hue):
    _ = sns.pairplot(tips, hue=hue)

interactive(children=(Dropdown(description='hue', options=('smoker', 'sex', 'time', 'day'), value='smoker'), O…

In [5]:
import pymongo
import pandas as pd

In [6]:
DATABASE_NAME = "big_data"
STAT_TABLE = "statistics"
SLOT_TABLE = "timeslots"
WINDOW_TABLE = "window"

In [7]:
mongo_client = pymongo.MongoClient("mongodb://localhost:27017")
used_db = mongo_client[DATABASE_NAME]

statistics = used_db[STAT_TABLE]
timeslots = used_db[SLOT_TABLE]
window = used_db[WINDOW_TABLE]

In [8]:
#my_query = statistics.find({"sensor_type": sensor_type})
#my_query = statistics.find({"measurement": {'$gt': 0}})
#res = pd.DataFrame(my_query)
#res.head()

## Basic statistics

In [9]:
from datetime import *
import time
import matplotlib.pyplot as plt

In [10]:
day = timedelta(days=1)

MEASUREMENT = "measurement"
MUNICIPALITY = "municipality"

# query 1: stats (min, max, avg) per type (hue 1) per space (hue 2) per time (hue 3)
@interact(sensor_type = [("temperature", 0), ("humidity",1), ("light",2), ("movement",3)], 
          space_type = ["space", "municipality", "Brussels"],
          period = [("24hrs", day), ("2 days", 2 * day), ("last week", 7 * day), ("last month", 30*day), ("last year", 365*day)])

def report_statistics(sensor_type, space_type, period):
    
    # define current date
    # as the last date recorded in the database
    current_date = statistics.find_one(sort=[("time", -1)])["time"]
    
    # query to obtain sensor type between current_time - period to now
    my_query = statistics.find({"sensor_type": sensor_type, "time": {'$gt': current_date - period}})
    res = pd.DataFrame([elem for elem in my_query])
    
    # transform information in DF to solely about IDs
    res["p-i"] = res["p-i"].map(lambda x: int(x.rstrip('0123').rstrip("-")))
    
    # case: space
    if space_type == "space":
        list_max = res.groupby(["p-i"])[MEASUREMENT].max()
        list_min = res.groupby(["p-i"])[MEASUREMENT].min()
        list_mean = res.groupby(["p-i"])[MEASUREMENT].mean()
        
        spaces = list(res.groupby(["p-i"]).groups.keys())
        max_values, min_values, mean_values = list(list_max.values), list(list_min.values), list(list_mean.values)

        print("space\tmax\t\t\tmin\t\t\tavg")
        for i in range(len(spaces)):
            print("{}\t{}\t{}\t{}".format(spaces[i], max_values[i], min_values[i], mean_values[i]))
        
    # case : municipality
    elif space_type == "municipality":
        
        list_max = res.groupby([MUNICIPALITY])[MEASUREMENT].max()
        list_min = res.groupby([MUNICIPALITY])[MEASUREMENT].min()
        list_mean = res.groupby([MUNICIPALITY])[MEASUREMENT].mean()
        
        municipalities = list(res.groupby([MUNICIPALITY]).groups.keys())
        max_values, min_values, mean_values = list(list_max.values), list(list_min.values), list(list_mean.values)
    
        # fixed number -> can be plotted
        plt.xlabel("municipalities")
        plt.xticks(rotation=90)
        plt.ylabel("reading")
        plt.plot(municipalities, max_values)
        plt.plot(municipalities, min_values)
        plt.plot(municipalities, mean_values)
        plt.show()
        
        print("municipality\tmax\t\t\tmin\t\t\tavg")
        for i in range(len(municipalities)):
            print("{}\t\t{}\t{}\t{}".format(municipalities[i], max_values[i], min_values[i], mean_values[i]))
    
    # case : Brussels  
    else:
        
        maximum_measurement = res[MEASUREMENT].max()
        minimum_measurement = res[MEASUREMENT].min()
        average_measurement = res[MEASUREMENT].mean()
        
        print("max_value : {}".format(maximum_measurement))
        print("min_value : {}".format(minimum_measurement))
        print("avg_value : {}".format(average_measurement))

interactive(children=(Dropdown(description='sensor_type', options=(('temperature', 0), ('humidity', 1), ('ligh…

## Timeslots

In [11]:
DAYTIME = "daytime"
NIGHTTIME = "nighttime"
TRESHOLD = 19.5

# query 2: for each temp sensor -> characterization timeslots
@interact(space = ["space", "municipality", "Brussels"],
          privacy = ["private", "public"],
          period = [("last month", 30*day), ("last year", 365*day)])

def classifyTimeslots(space, privacy, period):
    # define current date
    # as the last date recorded in the database
    current_date = timeslots.find_one(sort=[("time", -1)])["time"]
    
    # obtain entries in time period with specific place
    my_query = timeslots.find({"time": {'$gt': current_date - period}, "type_space": privacy})
    res = pd.DataFrame([elem for elem in my_query])
    
    # transform information in DF to solely about IDs
    res["p-i"] = res["p-i"].map(lambda x: int(x.rstrip('0123').rstrip("-")))
    
    # case : per space
    if space == "space":
        # gather measures
        slot_characterization = res.groupby(["p-i", "slot"])[MEASUREMENT]
        
        # for each sensor/slot pair and readings
        print("sensor\tslot\ttemperature type")
        for sensor_slot, readings in slot_characterization:
            sensor, slot = sensor_slot[0], sensor_slot[1]
            type_temp = DAYTIME
            if readings.mean() < TRESHOLD:
                type_temp = NIGHTTIME
            print("{}\t{}\t{}".format(sensor, slot, type_temp))        
    
    # case : per municipality
    elif space == "municipality":
        municipality_characterization = res.groupby(["municipality", "slot"])[MEASUREMENT]
        
        print("municipality\tslot\ttemperature type")
        for sensor_slot, readings in municipality_characterization:
            sensor, slot = sensor_slot[0], sensor_slot[1]
            type_temp = DAYTIME
            if readings.mean() < TRESHOLD:
                type_temp = NIGHTTIME
            print("{}\t\t{}\t{}".format(sensor, slot, type_temp)) 
    
    # case : Brussels
    else:
        bxl_characterization = res.groupby(["slot"])[MEASUREMENT]
        for slot, readings in bxl_characterization:
            type_temp = DAYTIME
            if readings.mean() < TRESHOLD:
                type_temp = NIGHTTIME

        print("Brussels temperature type : {}".format(type_temp))

interactive(children=(Dropdown(description='space', options=('space', 'municipality', 'Brussels'), value='spac…

## Sliding window

In [12]:
@interact(reset=["reset"])

def slidingWindow(reset):
    hour = timedelta(hours=1)

    # define current date
    # as the last date recorded in the database
    current_date = window.find_one(sort=[("time", -1)])["time"]

    # The "window" operation has already been taken into account
    # Every record that has the current_date as its "time" value is in the last 24h window
    my_query = window.find({"time": current_date})   
    res = pd.DataFrame([elem for elem in my_query])

    information = res.groupby([MEASUREMENT])["frequency"]

    for measure, frequency in information:
        print("{}\t{}".format(round(measure, 1), float(frequency)))

#slidingWindow("test")
    

interactive(children=(Dropdown(description='reset', options=('reset',), value='reset'), Output()), _dom_classe…