In [None]:
import findspark
findspark.init()

import datetime
import pyparsing as pp
import ipywidgets as ipy

from pyspark import sql, SparkConf, SparkContext
from pyspark.sql.functions import col, to_date, avg
from pyspark.sql.functions import max as sparkMax
from pyspark.sql.functions import min as sparkMin
from IPython.display import display, clear_output
from ipywidgets import Output, VBox, widgets, interact

In [None]:
conf = SparkConf().setAppName("Read_CSV")
sc = SparkContext(conf=conf)
sql_context = sql.SQLContext(sc)

df_pollution = sql_context.read.csv("Luchtvervuiling.csv", header=True)

In [None]:
MONTHS_WITH_OPTION_ALL = ["all months", "january", "february", "march", "april", "may", "june", "july",\
                          "august", "september", "october", "november", "december"]
TYPES_P = ["so2", "no2", "rspm","spm"]

In [None]:
#Haal alle waarden uit date kolom met als doel alle verschillende jaren in een lijst te steken voor filter
date_rows = df_pollution.select(to_date(df_pollution.date).alias("to_date")).collect()
years = []
for row in date_rows:
    try:
        if row.to_date.year not in years:
            years.append(row.to_date.year)
    except:
        pass
years.sort(reverse=True)

#Haal alle waarden uit location kolom met als doel alle verschillende steden in een lijst te steken voor filter
location_rows = df_pollution.select("location").distinct().collect()
locations = [str(row["location"]) for row in location_rows]
locations.sort()

In [None]:
#Filters
city_table_filter = widgets.Dropdown(
    options = locations,
    value = locations[0],
    description = "City:",
    disabled = False,
)
year_table_filter = widgets.Dropdown(
    options = years,
    value = years[0],
    description = "Year:",
    disabled = False,
)
month_table_filter = widgets.Dropdown(
    options = MONTHS_WITH_OPTION_ALL,
    value = MONTHS_WITH_OPTION_ALL[0],
    description = "Month:",
    disabled = False,
)
display(widgets.HBox((city_table_filter, year_table_filter, month_table_filter)))

def update_table():
    #Get filter waarden
    selected_city = city_table_filter.value
    selected_year = str(year_table_filter.value)
    selected_month = month_table_filter.value
    
    #Data van df_pollution die getoond moet worden overzetten naar df_table
    df_table = df_pollution.select(df_pollution["date"], df_pollution["state"], df_pollution["location"], df_pollution["type"],\
                                 df_pollution["so2"], df_pollution["no2"], df_pollution["rspm"], df_pollution["spm"])
    if str(selected_month) == "all months" :
        df_table = df_table.filter((col("date").between(selected_year + "-01-01",\
                                                      selected_year + "-12-31"))\
                                & (col("location") == selected_city))
    else :
        month = MONTHS_WITH_OPTION_ALL.index(selected_month)
        if month < 10 :
            month = "0" + str(month)
        month = str(month)
        df_table = df_table.filter((col("date").between(selected_year + "-" + month + "-01",\
                                                      selected_year + "-" + month + "-31"))\
                                 & (col("location") == selected_city))
    
    df_table = df_table.sort(col("date"))
    count = df_table.count()
    if count == 0 :
        no_measurements_error = "No measurements have taken place at " + selected_city + " during "
        error_ending_str = "the year " + selected_year + "."
        if str(selected_month) == "all months" :
            no_measurements_error += error_ending_str
        else :
            no_measurements_error += selected_month + " of " + error_ending_str
        print(no_measurements_error)
    else :
        df_table.show(df_table.count())

update_table()
#Update bij aanpassing van filters
def on_change_table_filter(change):
    if change["name"] == "value" and (change["new"] != change["old"]):
        clear_output()
        display(widgets.HBox((city_table_filter, year_table_filter, month_table_filter)))
        update_table()

#Filters linken aan on_change_table_filter functie
city_table_filter.observe(on_change_table_filter)
year_table_filter.observe(on_change_table_filter)
month_table_filter.observe(on_change_table_filter)

In [None]:
#Filters
city_short_table_filter = widgets.Dropdown(
    options = locations,
    value = locations[0],
    description = "City:",
    disabled = False,
)
yearshort_table_filter = widgets.Dropdown(
    options = years,
    value = years[0],
    description = "Year:",
    disabled = False,
)
monthshort_table_filter = widgets.Dropdown(
    options = MONTHS_WITH_OPTION_ALL,
    value = MONTHS_WITH_OPTION_ALL[0],
    description = "Month:",
    disabled = False,
)
types_p_short_table_filter = widgets.Dropdown(
    options = TYPES_P,
    value = TYPES_P[0],
    description = "Type:",
    disabled = False,
)
display(widgets.HBox((city_short_table_filter, types_p_short_table_filter)))
display(widgets.HBox((yearshort_table_filter, monthshort_table_filter)))

def update_short_table():
    #Get filter waarden
    selected_city = city_short_table_filter.value
    selected_year = str(yearshort_table_filter.value)
    selected_month = monthshort_table_filter.value
    selected_p_type = types_p_short_table_filter.value
    
    #Data van df_pollution die getoond moet worden overzetten naar df_short_table
    df_short_table = df_pollution.select(df_pollution["date"], df_pollution["state"], df_pollution["location"],\
                                      df_pollution["type"], df_pollution[selected_p_type])
    if str(selected_month) == "all months" :
        df_short_table = df_short_table.filter((col("date").between(selected_year + "-01-01",\
                                                                selected_year + "-12-31"))\
                                           & (col("location") == selected_city)\
                                           & (col(selected_p_type) != "NA"))
    else :
        month = MONTHS_WITH_OPTION_ALL.index(selected_month)
        if month < 10 :
            month = "0" + str(month)
        month = str(month)
        df_short_table = df_short_table.filter((col("date").between(selected_year + "-" + month + "-01",\
                                                                selected_year + "-" + month + "-31"))\
                                           & (col("location") == selected_city)\
                                           & (col(selected_p_type) != "NA"))
    
    df_short_table = df_short_table.sort(col("date"))
    count = df_short_table.count()
    if count == 0 :
        no_measurements_error = "No measurements of type " + selected_p_type + " have taken place at " + selected_city + " during "
        error_ending_str = "the year " + selected_year + "."
        if selected_month == "all months" :
            no_measurements_error += error_ending_str
        else :
            no_measurements_error += selected_month + " of " + error_ending_str
        print(no_measurements_error)
    else :
        short_table = df_short_table.agg(avg(col(selected_p_type)),\
                                      sparkMax(col(selected_p_type)),\
                                      sparkMin(col(selected_p_type)))
        short_table = short_table.select(col("avg(" + selected_p_type + ")")\
                                       .alias("Average " + selected_p_type),\
                                       col("max(" + selected_p_type + ")")\
                                       .alias("Maximum " + selected_p_type),\
                                       col("min(" + selected_p_type + ")")\
                                       .alias("Minimum " + selected_p_type))
        short_table.show()

update_short_table()
#Update bij aanpassing van filters
def on_change_short_table_filter(change):
    if change["name"] == "value" and (change["new"] != change["old"]):
        clear_output()
        display(widgets.HBox((city_short_table_filter, types_p_short_table_filter)))
        display(widgets.HBox((yearshort_table_filter, monthshort_table_filter)))
        update_short_table()

#Filters linken aan on_change_short_table_filter functie
city_short_table_filter.observe(on_change_short_table_filter)
yearshort_table_filter.observe(on_change_short_table_filter)
monthshort_table_filter.observe(on_change_short_table_filter)
types_p_short_table_filter.observe(on_change_short_table_filter)